use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled by heuermh.
the class ImportGfa1Task method run.
@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
taskMonitor.setStatusMessage("Reading segments from file ...");
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream segments, building cache
stream(readable, new Gfa1Adapter() {
@Override
protected boolean segment(final Segment segment) {
segmentsById.put(segment.getId(), segment);
return true;
}
});
}
taskMonitor.setStatusMessage("Finding reverse orientation references ...");
final List<Path> paths = new ArrayList<Path>();
final List<Link> links = new ArrayList<Link>();
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream paths and links, looking for reverse orientation references
stream(readable, new Gfa1Adapter() {
private void putIfAbsent(final Reference reference) {
Segment segment = segmentsById.get(reference.getId());
if (segment == null) {
throw new RuntimeException("could not find segment by id " + reference.getId());
}
if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
}
}
@Override
protected boolean path(final Path path) {
for (Reference reference : path.getSegments()) {
putIfAbsent(reference);
}
if (loadPaths) {
paths.add(path);
}
return true;
}
@Override
protected boolean link(final Link link) {
putIfAbsent(link.getSource());
putIfAbsent(link.getTarget());
links.add(link);
return true;
}
@Override
protected boolean traversal(final Traversal traversal) {
traversalsByPathName.put(traversal.getPathName(), traversal);
return true;
}
});
}
logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
segmentsById.clear();
taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
final CyNetwork network = applicationManager.getCurrentNetwork();
final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
String id = c.getRowKey();
Orientation orientation = c.getColumnKey();
Segment segment = c.getValue();
String name = id + (orientation.isForward() ? "+" : "-");
if (!nodes.containsKey(name)) {
CyNode node = network.addNode();
CyTable nodeTable = network.getDefaultNodeTable();
CyRow nodeRow = nodeTable.getRow(node.getSUID());
Integer length = segment.getLengthOpt().orElse(null);
Integer readCount = segment.getReadCountOpt().orElse(null);
Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
Integer kmerCount = segment.getKmerCountOpt().orElse(null);
String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
String sequenceUri = segment.getSequenceUriOpt().orElse(null);
setValue(nodeTable, nodeRow, "name", String.class, name);
setValue(nodeTable, nodeRow, "length", Integer.class, length);
setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
// default display length to length
Integer displayLength = length;
String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
if (sequence != null) {
Integer sequenceLength = sequence.length();
String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
Integer displaySequenceLength = displaySequence.length();
if (loadSequences) {
setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
}
setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
// override display length with sequence length if necessary
if (length == null || length != sequenceLength) {
displayLength = sequenceLength;
}
}
StringBuilder sb = new StringBuilder();
sb.append(name);
if (displayLength != null) {
sb.append(" ");
sb.append(displayLength);
sb.append(" bp");
}
String displayName = sb.toString();
if (readCount != null) {
sb.append(" ");
sb.append(readCount);
sb.append(" reads");
}
if (fragmentCount != null) {
sb.append(" ");
sb.append(fragmentCount);
sb.append(" fragments");
}
if (kmerCount != null) {
sb.append(" ");
sb.append(kmerCount);
sb.append(" kmers");
}
String displayLabel = sb.toString();
setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
nodes.put(name, node);
}
}
logger.info("converted segments and orientation to " + nodes.size() + " nodes");
segmentsByOrientation.clear();
taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
for (Link link : links) {
String sourceId = link.getSource().getId();
String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
String targetId = link.getTarget().getId();
String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge edge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(edge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "edge");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
}
logger.info("converted links to " + links.size() + " edges");
nodes.clear();
links.clear();
// pass paths to AssemblyApp if requested
if (loadPaths && !paths.isEmpty()) {
taskMonitor.setStatusMessage("Loading paths in path view ...");
assemblyModel.setInputFileName(inputFile.toString());
assemblyModel.setPaths(paths, traversalsByPathName);
}
}
use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.
the class Gfa1ToGfa2 method call.
@Override
public Integer call() throws Exception {
PrintWriter writer = null;
try {
writer = writer(outputGfa2File);
final PrintWriter w = writer;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean header(final Header header) {
// convert VN:Z:1.0 to VN:Z:2.0 annotation if present
if (header.getAnnotations().containsKey("VN")) {
if (!"1.0".equals(header.getAnnotations().get("VN").getValue())) {
throw new RuntimeException("cannot convert input as GFA 1.0, was " + header.getAnnotations().get("VN").getValue());
}
Map<String, Annotation> annotations = new HashMap<String, Annotation>();
annotations.put("VN", new Annotation("VN", "Z", "2.0"));
for (Annotation annotation : header.getAnnotations().values()) {
if (!"VN".equals(annotation.getName())) {
annotations.put(annotation.getName(), annotation);
}
}
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(annotations), w);
} else {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(header.getAnnotations()), w);
}
return true;
}
@Override
public boolean segment(final Segment segment) {
if (segment.getSequence() != null) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), segment.getSequence().length(), segment.getSequence(), segment.getAnnotations()), w);
} else if (segment.getAnnotations().containsKey("LN")) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), Integer.parseInt(segment.getAnnotations().get("LN").getValue()), segment.getSequence(), segment.getAnnotations()), w);
} else {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), 0, segment.getSequence(), segment.getAnnotations()), w);
}
return true;
}
@Override
public boolean link(final Link link) {
Position unknown = new Position(0, false);
Alignment alignment = link.getOverlap() == null ? null : Alignment.valueOf(link.getOverlap());
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(link.getSource()), toGfa2Reference(link.getTarget()), unknown, unknown, unknown, unknown, alignment, link.getAnnotations()), w);
return true;
}
@Override
public boolean containment(final Containment containment) {
Position unknown = new Position(0, false);
Position targetStart = new Position(containment.getPosition(), false);
Alignment alignment = containment.getOverlap() == null ? null : Alignment.valueOf(containment.getOverlap());
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(containment.getContainer()), toGfa2Reference(containment.getContained()), unknown, unknown, targetStart, unknown, alignment, containment.getAnnotations()), w);
return true;
}
@Override
public boolean path(final Path path) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Path(path.getName(), toGfa2References(path.getSegments()), path.getAnnotations()), w);
return true;
}
});
return 0;
} finally {
try {
writer.close();
} catch (Exception e) {
// empty
}
}
}
use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.
the class SegmentsToCytoscapeNodes method call.
@Override
public Integer call() throws Exception {
PrintWriter nodesWriter = null;
try {
nodesWriter = writer(outputNodesFile);
nodesWriter.println(HEADER);
final PrintWriter nw = nodesWriter;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean segment(final Segment segment) {
StringBuilder sb = new StringBuilder();
sb.append(segment.getName());
sb.append("\t");
sb.append(segment.getSequenceOpt().orElse(""));
sb.append("\t");
sb.append(segment.containsLength() ? segment.getLength() : "");
sb.append("\t");
sb.append(segment.containsReadCount() ? segment.getReadCount() : "");
sb.append("\t");
sb.append(segment.containsFragmentCount() ? segment.getFragmentCount() : "");
sb.append("\t");
sb.append(segment.containsKmerCount() ? segment.getKmerCount() : "");
sb.append("\t");
sb.append(segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : "");
sb.append("\t");
sb.append(segment.getSequenceUriOpt().orElse(""));
nw.println(sb);
return true;
}
});
return 0;
} finally {
try {
nodesWriter.close();
} catch (Exception e) {
// empty
}
}
}
use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.
the class ExportSegments method call.
@Override
public Integer call() throws Exception {
BufferedReader reader = null;
PrintWriter writer = null;
try {
reader = reader(inputGfa1File);
writer = writer(outputFastaFile);
final PrintWriter w = writer;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean segment(final Segment segment) {
if (segment.hasSequence()) {
w.print(">");
w.println(describeSegment(segment));
String sequence = segment.getSequence();
for (int i = 0, length = sequence.length(); i <= length; i += lineWidth) {
w.println(sequence.substring(i, Math.min(i + lineWidth, length)));
}
}
return true;
}
});
return 0;
} finally {
try {
reader.close();
} catch (Exception e) {
// ignore
}
try {
writer.close();
} catch (Exception e) {
// ignore
}
}
}
use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.
the class TraversalsToPropertyGraph method call.
@Override
public Integer call() throws Exception {
PrintWriter edgesWriter = null;
try {
edgesWriter = writer(outputEdgesFile);
edgesWriter.println(HEADER);
final PrintWriter ew = edgesWriter;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean traversal(final Traversal traversal) {
if (!traversal.containsId()) {
throw new IllegalArgumentException("traversal identifiers are required for property graph CSV format");
}
StringBuilder sb = new StringBuilder();
sb.append(traversal.getId());
sb.append(",");
sb.append(traversal.getSource().getName());
sb.append(",");
sb.append(traversal.getTarget().getName());
sb.append(",");
sb.append(traversal.getSource().getOrientation().getSymbol());
sb.append(",");
sb.append(traversal.getTarget().getOrientation().getSymbol());
sb.append(",t,");
sb.append(traversal.getPathName());
sb.append(",");
sb.append(traversal.getOrdinal());
sb.append(",");
sb.append(traversal.getOverlapOpt().orElse(""));
ew.println(sb);
return true;
}
});
return 0;
} finally {
try {
edgesWriter.close();
} catch (Exception e) {
// empty
}
}
}
Aggregations