use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.
the class AssemblyModel method traversalsFor.
/**
* Create and return a list of path traversals for the specified path.
*
* @param path path, must not be null
* @return a list of path traversals for the specified path
*/
static List<Traversal> traversalsFor(final Path path) {
checkNotNull(path);
int size = path.getSegments().size();
List<Traversal> traversals = new ArrayList<Traversal>(size);
Reference source = null;
Reference target = null;
String overlap = null;
Map<String, Annotation> emptyAnnotations = Collections.emptyMap();
for (int i = 0; i < size; i++) {
target = path.getSegments().get(i);
if (i > 0) {
overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
}
if (source != null) {
Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, emptyAnnotations);
traversals.add(traversal);
}
source = target;
}
return traversals;
}
use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.
the class AssemblyModel method setPaths.
/**
* Set the paths for this assembly model to the specified GFA 1.0 paths.
*
* @param paths zero or more GFA 1.0 paths, must not be null
* @param traversalsByPathName traversals keyed by path name, must not be null
*/
void setPaths(final Iterable<Path> paths, final ListMultimap<String, Traversal> traversalsByPathName) {
checkNotNull(paths);
checkNotNull(traversalsByPathName);
// reset if necessary
if (!this.paths.isEmpty()) {
setPath(null);
this.paths.clear();
traversals.clear();
traversalsByPath.clear();
}
// create traversals from paths if necessary
for (Path path : paths) {
List<Traversal> traversals = traversalsByPathName.get(path.getName());
traversalsByPath.putAll(path, traversals.isEmpty() ? traversalsFor(path) : traversals);
}
if (!traversalsByPath.isEmpty()) {
Set<Path> keys = traversalsByPath.keySet();
paths().addAll(keys);
setPath(keys.iterator().next());
}
}
use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.
the class ImportGfa1Task method run.
@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
taskMonitor.setStatusMessage("Reading segments from file ...");
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream segments, building cache
stream(readable, new Gfa1Adapter() {
@Override
protected boolean segment(final Segment segment) {
segmentsById.put(segment.getId(), segment);
return true;
}
});
}
taskMonitor.setStatusMessage("Finding reverse orientation references ...");
final List<Path> paths = new ArrayList<Path>();
final List<Link> links = new ArrayList<Link>();
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream paths and links, looking for reverse orientation references
stream(readable, new Gfa1Adapter() {
private void putIfAbsent(final Reference reference) {
Segment segment = segmentsById.get(reference.getId());
if (segment == null) {
throw new RuntimeException("could not find segment by id " + reference.getId());
}
if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
}
}
@Override
protected boolean path(final Path path) {
for (Reference reference : path.getSegments()) {
putIfAbsent(reference);
}
if (loadPaths) {
paths.add(path);
}
return true;
}
@Override
protected boolean link(final Link link) {
putIfAbsent(link.getSource());
putIfAbsent(link.getTarget());
links.add(link);
return true;
}
@Override
protected boolean traversal(final Traversal traversal) {
traversalsByPathName.put(traversal.getPathName(), traversal);
return true;
}
});
}
logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
segmentsById.clear();
taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
final CyNetwork network = applicationManager.getCurrentNetwork();
final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
String id = c.getRowKey();
Orientation orientation = c.getColumnKey();
Segment segment = c.getValue();
String name = id + (orientation.isForward() ? "+" : "-");
if (!nodes.containsKey(name)) {
CyNode node = network.addNode();
CyTable nodeTable = network.getDefaultNodeTable();
CyRow nodeRow = nodeTable.getRow(node.getSUID());
Integer length = segment.getLengthOpt().orElse(null);
Integer readCount = segment.getReadCountOpt().orElse(null);
Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
Integer kmerCount = segment.getKmerCountOpt().orElse(null);
String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
String sequenceUri = segment.getSequenceUriOpt().orElse(null);
setValue(nodeTable, nodeRow, "name", String.class, name);
setValue(nodeTable, nodeRow, "length", Integer.class, length);
setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
// default display length to length
Integer displayLength = length;
String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
if (sequence != null) {
Integer sequenceLength = sequence.length();
String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
Integer displaySequenceLength = displaySequence.length();
if (loadSequences) {
setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
}
setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
// override display length with sequence length if necessary
if (length == null || length != sequenceLength) {
displayLength = sequenceLength;
}
}
StringBuilder sb = new StringBuilder();
sb.append(name);
if (displayLength != null) {
sb.append(" ");
sb.append(displayLength);
sb.append(" bp");
}
String displayName = sb.toString();
if (readCount != null) {
sb.append(" ");
sb.append(readCount);
sb.append(" reads");
}
if (fragmentCount != null) {
sb.append(" ");
sb.append(fragmentCount);
sb.append(" fragments");
}
if (kmerCount != null) {
sb.append(" ");
sb.append(kmerCount);
sb.append(" kmers");
}
String displayLabel = sb.toString();
setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
nodes.put(name, node);
}
}
logger.info("converted segments and orientation to " + nodes.size() + " nodes");
segmentsByOrientation.clear();
taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
for (Link link : links) {
String sourceId = link.getSource().getId();
String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
String targetId = link.getTarget().getId();
String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge edge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(edge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "edge");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
}
logger.info("converted links to " + links.size() + " edges");
nodes.clear();
links.clear();
// pass paths to AssemblyApp if requested
if (loadPaths && !paths.isEmpty()) {
taskMonitor.setStatusMessage("Loading paths in path view ...");
assemblyModel.setInputFileName(inputFile.toString());
assemblyModel.setPaths(paths, traversalsByPathName);
}
}
use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled-bio by heuermh.
the class TraversalsToPropertyGraph method call.
@Override
public Integer call() throws Exception {
PrintWriter edgesWriter = null;
try {
edgesWriter = writer(outputEdgesFile);
edgesWriter.println(HEADER);
final PrintWriter ew = edgesWriter;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean traversal(final Traversal traversal) {
if (!traversal.containsId()) {
throw new IllegalArgumentException("traversal identifiers are required for property graph CSV format");
}
StringBuilder sb = new StringBuilder();
sb.append(traversal.getId());
sb.append(",");
sb.append(traversal.getSource().getName());
sb.append(",");
sb.append(traversal.getTarget().getName());
sb.append(",");
sb.append(traversal.getSource().getOrientation().getSymbol());
sb.append(",");
sb.append(traversal.getTarget().getOrientation().getSymbol());
sb.append(",t,");
sb.append(traversal.getPathName());
sb.append(",");
sb.append(traversal.getOrdinal());
sb.append(",");
sb.append(traversal.getOverlapOpt().orElse(""));
ew.println(sb);
return true;
}
});
return 0;
} finally {
try {
edgesWriter.close();
} catch (Exception e) {
// empty
}
}
}
use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled-bio by heuermh.
the class TraversePaths method call.
@Override
public Integer call() throws Exception {
BufferedReader reader = null;
PrintWriter writer = null;
try {
reader = reader(inputGfa1File);
writer = writer(outputGfa1File);
final PrintWriter w = writer;
Gfa1Reader.stream(reader, new Gfa1Listener() {
@Override
public boolean record(final Gfa1Record gfa1Record) {
Gfa1Writer.write(gfa1Record, w);
if (gfa1Record instanceof Path) {
Path path = (Path) gfa1Record;
int size = path.getSegments().size();
Reference source = null;
Reference target = null;
String overlap = null;
for (int i = 0; i < size; i++) {
target = path.getSegments().get(i);
if (i > 0) {
overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
}
if (source != null) {
Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, EMPTY_ANNOTATIONS);
Gfa1Writer.write(traversal, w);
}
source = target;
}
}
return true;
}
});
return 0;
} finally {
try {
reader.close();
} catch (Exception e) {
// ignore
}
try {
writer.close();
} catch (Exception e) {
// ignore
}
}
}
Aggregations