use of org.dishevelled.bio.assembly.gfa2.Orientation in project dishevelled by heuermh.
the class ImportGfa1Task method run.
@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
taskMonitor.setStatusMessage("Reading segments from file ...");
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream segments, building cache
stream(readable, new Gfa1Adapter() {
@Override
protected boolean segment(final Segment segment) {
segmentsById.put(segment.getId(), segment);
return true;
}
});
}
taskMonitor.setStatusMessage("Finding reverse orientation references ...");
final List<Path> paths = new ArrayList<Path>();
final List<Link> links = new ArrayList<Link>();
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream paths and links, looking for reverse orientation references
stream(readable, new Gfa1Adapter() {
private void putIfAbsent(final Reference reference) {
Segment segment = segmentsById.get(reference.getId());
if (segment == null) {
throw new RuntimeException("could not find segment by id " + reference.getId());
}
if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
}
}
@Override
protected boolean path(final Path path) {
for (Reference reference : path.getSegments()) {
putIfAbsent(reference);
}
if (loadPaths) {
paths.add(path);
}
return true;
}
@Override
protected boolean link(final Link link) {
putIfAbsent(link.getSource());
putIfAbsent(link.getTarget());
links.add(link);
return true;
}
@Override
protected boolean traversal(final Traversal traversal) {
traversalsByPathName.put(traversal.getPathName(), traversal);
return true;
}
});
}
logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
segmentsById.clear();
taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
final CyNetwork network = applicationManager.getCurrentNetwork();
final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
String id = c.getRowKey();
Orientation orientation = c.getColumnKey();
Segment segment = c.getValue();
String name = id + (orientation.isForward() ? "+" : "-");
if (!nodes.containsKey(name)) {
CyNode node = network.addNode();
CyTable nodeTable = network.getDefaultNodeTable();
CyRow nodeRow = nodeTable.getRow(node.getSUID());
Integer length = segment.getLengthOpt().orElse(null);
Integer readCount = segment.getReadCountOpt().orElse(null);
Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
Integer kmerCount = segment.getKmerCountOpt().orElse(null);
String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
String sequenceUri = segment.getSequenceUriOpt().orElse(null);
setValue(nodeTable, nodeRow, "name", String.class, name);
setValue(nodeTable, nodeRow, "length", Integer.class, length);
setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
// default display length to length
Integer displayLength = length;
String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
if (sequence != null) {
Integer sequenceLength = sequence.length();
String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
Integer displaySequenceLength = displaySequence.length();
if (loadSequences) {
setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
}
setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
// override display length with sequence length if necessary
if (length == null || length != sequenceLength) {
displayLength = sequenceLength;
}
}
StringBuilder sb = new StringBuilder();
sb.append(name);
if (displayLength != null) {
sb.append(" ");
sb.append(displayLength);
sb.append(" bp");
}
String displayName = sb.toString();
if (readCount != null) {
sb.append(" ");
sb.append(readCount);
sb.append(" reads");
}
if (fragmentCount != null) {
sb.append(" ");
sb.append(fragmentCount);
sb.append(" fragments");
}
if (kmerCount != null) {
sb.append(" ");
sb.append(kmerCount);
sb.append(" kmers");
}
String displayLabel = sb.toString();
setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
nodes.put(name, node);
}
}
logger.info("converted segments and orientation to " + nodes.size() + " nodes");
segmentsByOrientation.clear();
taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
for (Link link : links) {
String sourceId = link.getSource().getId();
String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
String targetId = link.getTarget().getId();
String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge edge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(edge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "edge");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
}
logger.info("converted links to " + links.size() + " edges");
nodes.clear();
links.clear();
// pass paths to AssemblyApp if requested
if (loadPaths && !paths.isEmpty()) {
taskMonitor.setStatusMessage("Loading paths in path view ...");
assemblyModel.setInputFileName(inputFile.toString());
assemblyModel.setPaths(paths, traversalsByPathName);
}
}
use of org.dishevelled.bio.assembly.gfa2.Orientation in project dishevelled by heuermh.
the class ImportGfa2Task method run.
@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 2.0 format");
final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
taskMonitor.setStatusMessage("Reading segments from file ...");
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream segments, building cache
stream(readable, new Gfa2Adapter() {
@Override
protected boolean segment(final Segment segment) {
segmentsById.put(segment.getId(), segment);
return true;
}
});
}
taskMonitor.setStatusMessage("Finding reverse orientation references ...");
final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
final List<Edge> edges = new ArrayList<Edge>();
final List<Gap> gaps = new ArrayList<Gap>();
final List<Path> paths = new ArrayList<Path>();
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream edges, gaps, and paths, looking for reverse orientation references
stream(readable, new Gfa2Adapter() {
private void putIfAbsent(final Reference reference) {
Segment segment = segmentsById.get(reference.getId());
if (segment == null) {
throw new RuntimeException("could not find segment by id " + reference.getId());
}
if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
}
}
@Override
public boolean edge(final Edge edge) {
putIfAbsent(edge.getSource());
putIfAbsent(edge.getTarget());
edges.add(edge);
return true;
}
@Override
public boolean gap(final Gap gap) {
putIfAbsent(gap.getSource());
putIfAbsent(gap.getTarget());
gaps.add(gap);
return true;
}
@Override
public boolean path(final Path path) {
for (Reference reference : path.getReferences()) {
putIfAbsent(reference);
}
if (loadPaths) {
paths.add(path);
}
return true;
}
});
}
logger.info("read {} segments, {} edges, {} gaps, and {} paths from {}", new Object[] { segmentsById.size(), edges.size(), gaps.size(), paths.size(), inputFile });
segmentsById.clear();
taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
final CyNetwork network = applicationManager.getCurrentNetwork();
final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
String id = c.getRowKey();
Orientation orientation = c.getColumnKey();
Segment segment = c.getValue();
String name = id + (orientation.isForward() ? "+" : "-");
if (!nodes.containsKey(name)) {
CyNode node = network.addNode();
CyTable nodeTable = network.getDefaultNodeTable();
CyRow nodeRow = nodeTable.getRow(node.getSUID());
Integer length = segment.getLength();
Integer readCount = segment.getReadCountOpt().orElse(null);
Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
Integer kmerCount = segment.getKmerCountOpt().orElse(null);
String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
String sequenceUri = segment.getSequenceUriOpt().orElse(null);
setValue(nodeTable, nodeRow, "name", String.class, name);
setValue(nodeTable, nodeRow, "length", Integer.class, length);
setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
// default display length to length
Integer displayLength = length;
String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
if (sequence != null) {
Integer sequenceLength = sequence.length();
String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
Integer displaySequenceLength = displaySequence.length();
if (loadSequences) {
setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
}
setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
// override display length with sequence length if necessary
if (length == null || length != sequenceLength) {
displayLength = sequenceLength;
}
}
StringBuilder sb = new StringBuilder();
sb.append(name);
if (displayLength != null) {
sb.append(" ");
sb.append(displayLength);
sb.append(" bp");
}
String displayName = sb.toString();
if (readCount != null) {
sb.append(" ");
sb.append(readCount);
sb.append(" reads");
}
if (fragmentCount != null) {
sb.append(" ");
sb.append(fragmentCount);
sb.append(" fragments");
}
if (kmerCount != null) {
sb.append(" ");
sb.append(kmerCount);
sb.append(" kmers");
}
String displayLabel = sb.toString();
setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
nodes.put(name, node);
}
}
logger.info("converted segments and orientation to " + nodes.size() + " nodes");
segmentsByOrientation.clear();
taskMonitor.setStatusMessage("Building Cytoscape edges from edges and gaps ...");
for (Edge edge : edges) {
String sourceId = edge.getSource().getId();
String sourceOrientation = edge.getSource().isForwardOrientation() ? "+" : "-";
String targetId = edge.getTarget().getId();
String targetOrientation = edge.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge cyEdge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(cyEdge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, edge.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "edge");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "sourceStart", String.class, edge.getSourceStart().toString());
setValue(edgeTable, edgeRow, "sourceEnd", String.class, edge.getSourceEnd().toString());
setValue(edgeTable, edgeRow, "targetStart", String.class, edge.getTargetStart().toString());
setValue(edgeTable, edgeRow, "targetEnd", String.class, edge.getTargetEnd().toString());
setValue(edgeTable, edgeRow, "alignment", String.class, edge.hasAlignment() ? edge.getAlignment().toString() : null);
setValue(edgeTable, edgeRow, "readCount", Integer.class, edge.getReadCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, edge.getFragmentCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "kmerCount", Integer.class, edge.getKmerCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, edge.getMappingQualityOpt().orElse(null));
setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, edge.getMismatchCountOpt().orElse(null));
}
logger.info("converted edges to " + edges.size() + " edges");
for (Gap gap : gaps) {
String sourceId = gap.getSource().getId();
String sourceOrientation = gap.getSource().isForwardOrientation() ? "+" : "-";
String targetId = gap.getTarget().getId();
String targetOrientation = gap.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge edge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(edge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, gap.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "gap");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "distance", Integer.class, gap.getDistance());
setValue(edgeTable, edgeRow, "variance", Integer.class, gap.getVarianceOpt().orElse(null));
}
logger.info("converted gaps to " + gaps.size() + " edges");
nodes.clear();
edges.clear();
gaps.clear();
// pass paths to AssemblyApp if requested
if (loadPaths && !paths.isEmpty()) {
taskMonitor.setStatusMessage("Loading paths in path view ...");
assemblyModel.setInputFileName(inputFile.toString());
// todo: convert to gfa1 paths?
// note paths in gfa2 can have references to segments, edges, or other groups
// assemblyModel.setPaths(paths, traversalsByPathName);
}
}
Aggregations