Search in sources :

Example 1 with Traversal

use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.

the class AssemblyModel method traversalsFor.

/**
 * Create and return a list of path traversals for the specified path.
 *
 * @param path path, must not be null
 * @return a list of path traversals for the specified path
 */
static List<Traversal> traversalsFor(final Path path) {
    checkNotNull(path);
    int size = path.getSegments().size();
    List<Traversal> traversals = new ArrayList<Traversal>(size);
    Reference source = null;
    Reference target = null;
    String overlap = null;
    Map<String, Annotation> emptyAnnotations = Collections.emptyMap();
    for (int i = 0; i < size; i++) {
        target = path.getSegments().get(i);
        if (i > 0) {
            overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
        }
        if (source != null) {
            Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, emptyAnnotations);
            traversals.add(traversal);
        }
        source = target;
    }
    return traversals;
}
Also used : Reference(org.dishevelled.bio.assembly.gfa1.Reference) ArrayList(java.util.ArrayList) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Annotation(org.dishevelled.bio.annotation.Annotation)

Example 2 with Traversal

use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.

the class AssemblyModel method setPaths.

/**
 * Set the paths for this assembly model to the specified GFA 1.0 paths.
 *
 * @param paths zero or more GFA 1.0 paths, must not be null
 * @param traversalsByPathName traversals keyed by path name, must not be null
 */
void setPaths(final Iterable<Path> paths, final ListMultimap<String, Traversal> traversalsByPathName) {
    checkNotNull(paths);
    checkNotNull(traversalsByPathName);
    // reset if necessary
    if (!this.paths.isEmpty()) {
        setPath(null);
        this.paths.clear();
        traversals.clear();
        traversalsByPath.clear();
    }
    // create traversals from paths if necessary
    for (Path path : paths) {
        List<Traversal> traversals = traversalsByPathName.get(path.getName());
        traversalsByPath.putAll(path, traversals.isEmpty() ? traversalsFor(path) : traversals);
    }
    if (!traversalsByPath.isEmpty()) {
        Set<Path> keys = traversalsByPath.keySet();
        paths().addAll(keys);
        setPath(keys.iterator().next());
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal)

Example 3 with Traversal

use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled by heuermh.

the class ImportGfa1Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa1Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final List<Path> paths = new ArrayList<Path>();
    final List<Link> links = new ArrayList<Link>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream paths and links, looking for reverse orientation references
        stream(readable, new Gfa1Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            protected boolean path(final Path path) {
                for (Reference reference : path.getSegments()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }

            @Override
            protected boolean link(final Link link) {
                putIfAbsent(link.getSource());
                putIfAbsent(link.getTarget());
                links.add(link);
                return true;
            }

            @Override
            protected boolean traversal(final Traversal traversal) {
                traversalsByPathName.put(traversal.getPathName(), traversal);
                return true;
            }
        });
    }
    logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLengthOpt().orElse(null);
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
    for (Link link : links) {
        String sourceId = link.getSource().getId();
        String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = link.getTarget().getId();
        String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
        setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted links to " + links.size() + " edges");
    nodes.clear();
    links.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
        assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa1.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa1.Reference) Orientation(org.dishevelled.bio.assembly.gfa1.Orientation) CyEdge(org.cytoscape.model.CyEdge) BufferedReader(java.io.BufferedReader) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) Link(org.dishevelled.bio.assembly.gfa1.Link)

Example 4 with Traversal

use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled-bio by heuermh.

the class TraversalsToPropertyGraph method call.

@Override
public Integer call() throws Exception {
    PrintWriter edgesWriter = null;
    try {
        edgesWriter = writer(outputEdgesFile);
        edgesWriter.println(HEADER);
        final PrintWriter ew = edgesWriter;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean traversal(final Traversal traversal) {
                if (!traversal.containsId()) {
                    throw new IllegalArgumentException("traversal identifiers are required for property graph CSV format");
                }
                StringBuilder sb = new StringBuilder();
                sb.append(traversal.getId());
                sb.append(",");
                sb.append(traversal.getSource().getName());
                sb.append(",");
                sb.append(traversal.getTarget().getName());
                sb.append(",");
                sb.append(traversal.getSource().getOrientation().getSymbol());
                sb.append(",");
                sb.append(traversal.getTarget().getOrientation().getSymbol());
                sb.append(",t,");
                sb.append(traversal.getPathName());
                sb.append(",");
                sb.append(traversal.getOrdinal());
                sb.append(",");
                sb.append(traversal.getOverlapOpt().orElse(""));
                ew.println(sb);
                return true;
            }
        });
        return 0;
    } finally {
        try {
            edgesWriter.close();
        } catch (Exception e) {
        // empty
        }
    }
}
Also used : Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Example 5 with Traversal

use of org.dishevelled.bio.assembly.gfa1.Traversal in project dishevelled-bio by heuermh.

the class TraversePaths method call.

@Override
public Integer call() throws Exception {
    BufferedReader reader = null;
    PrintWriter writer = null;
    try {
        reader = reader(inputGfa1File);
        writer = writer(outputGfa1File);
        final PrintWriter w = writer;
        Gfa1Reader.stream(reader, new Gfa1Listener() {

            @Override
            public boolean record(final Gfa1Record gfa1Record) {
                Gfa1Writer.write(gfa1Record, w);
                if (gfa1Record instanceof Path) {
                    Path path = (Path) gfa1Record;
                    int size = path.getSegments().size();
                    Reference source = null;
                    Reference target = null;
                    String overlap = null;
                    for (int i = 0; i < size; i++) {
                        target = path.getSegments().get(i);
                        if (i > 0) {
                            overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
                        }
                        if (source != null) {
                            Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, EMPTY_ANNOTATIONS);
                            Gfa1Writer.write(traversal, w);
                        }
                        source = target;
                    }
                }
                return true;
            }
        });
        return 0;
    } finally {
        try {
            reader.close();
        } catch (Exception e) {
        // ignore
        }
        try {
            writer.close();
        } catch (Exception e) {
        // ignore
        }
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Gfa1Record(org.dishevelled.bio.assembly.gfa1.Gfa1Record) Reference(org.dishevelled.bio.assembly.gfa1.Reference) BufferedReader(java.io.BufferedReader) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Gfa1Listener(org.dishevelled.bio.assembly.gfa1.Gfa1Listener) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Aggregations

Traversal (org.dishevelled.bio.assembly.gfa1.Traversal)8 PrintWriter (java.io.PrintWriter)5 CommandLineParseException (org.dishevelled.commandline.CommandLineParseException)5 BufferedReader (java.io.BufferedReader)4 Path (org.dishevelled.bio.assembly.gfa1.Path)4 Reference (org.dishevelled.bio.assembly.gfa1.Reference)4 ArrayList (java.util.ArrayList)3 Gfa1Adapter (org.dishevelled.bio.assembly.gfa1.Gfa1Adapter)3 Gfa1Listener (org.dishevelled.bio.assembly.gfa1.Gfa1Listener)3 Gfa1Record (org.dishevelled.bio.assembly.gfa1.Gfa1Record)3 Link (org.dishevelled.bio.assembly.gfa1.Link)2 HashBasedTable (com.google.common.collect.HashBasedTable)1 Table (com.google.common.collect.Table)1 FileReader (java.io.FileReader)1 HashMap (java.util.HashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 CyEdge (org.cytoscape.model.CyEdge)1 CyNetwork (org.cytoscape.model.CyNetwork)1 CyNode (org.cytoscape.model.CyNode)1 CyRow (org.cytoscape.model.CyRow)1