Search in sources :

Example 1 with Orientation

use of org.dishevelled.bio.assembly.gfa2.Orientation in project dishevelled by heuermh.

the class ImportGfa1Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa1Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final List<Path> paths = new ArrayList<Path>();
    final List<Link> links = new ArrayList<Link>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream paths and links, looking for reverse orientation references
        stream(readable, new Gfa1Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            protected boolean path(final Path path) {
                for (Reference reference : path.getSegments()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }

            @Override
            protected boolean link(final Link link) {
                putIfAbsent(link.getSource());
                putIfAbsent(link.getTarget());
                links.add(link);
                return true;
            }

            @Override
            protected boolean traversal(final Traversal traversal) {
                traversalsByPathName.put(traversal.getPathName(), traversal);
                return true;
            }
        });
    }
    logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLengthOpt().orElse(null);
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
    for (Link link : links) {
        String sourceId = link.getSource().getId();
        String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = link.getTarget().getId();
        String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
        setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted links to " + links.size() + " edges");
    nodes.clear();
    links.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
        assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa1.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa1.Reference) Orientation(org.dishevelled.bio.assembly.gfa1.Orientation) CyEdge(org.cytoscape.model.CyEdge) BufferedReader(java.io.BufferedReader) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) Link(org.dishevelled.bio.assembly.gfa1.Link)

Example 2 with Orientation

use of org.dishevelled.bio.assembly.gfa2.Orientation in project dishevelled by heuermh.

the class ImportGfa2Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 2.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa2Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final List<Edge> edges = new ArrayList<Edge>();
    final List<Gap> gaps = new ArrayList<Gap>();
    final List<Path> paths = new ArrayList<Path>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream edges, gaps, and paths, looking for reverse orientation references
        stream(readable, new Gfa2Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            public boolean edge(final Edge edge) {
                putIfAbsent(edge.getSource());
                putIfAbsent(edge.getTarget());
                edges.add(edge);
                return true;
            }

            @Override
            public boolean gap(final Gap gap) {
                putIfAbsent(gap.getSource());
                putIfAbsent(gap.getTarget());
                gaps.add(gap);
                return true;
            }

            @Override
            public boolean path(final Path path) {
                for (Reference reference : path.getReferences()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }
        });
    }
    logger.info("read {} segments, {} edges, {} gaps, and {} paths from {}", new Object[] { segmentsById.size(), edges.size(), gaps.size(), paths.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLength();
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from edges and gaps ...");
    for (Edge edge : edges) {
        String sourceId = edge.getSource().getId();
        String sourceOrientation = edge.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = edge.getTarget().getId();
        String targetOrientation = edge.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge cyEdge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(cyEdge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, edge.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "sourceStart", String.class, edge.getSourceStart().toString());
        setValue(edgeTable, edgeRow, "sourceEnd", String.class, edge.getSourceEnd().toString());
        setValue(edgeTable, edgeRow, "targetStart", String.class, edge.getTargetStart().toString());
        setValue(edgeTable, edgeRow, "targetEnd", String.class, edge.getTargetEnd().toString());
        setValue(edgeTable, edgeRow, "alignment", String.class, edge.hasAlignment() ? edge.getAlignment().toString() : null);
        setValue(edgeTable, edgeRow, "readCount", Integer.class, edge.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, edge.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, edge.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, edge.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, edge.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted edges to " + edges.size() + " edges");
    for (Gap gap : gaps) {
        String sourceId = gap.getSource().getId();
        String sourceOrientation = gap.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = gap.getTarget().getId();
        String targetOrientation = gap.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, gap.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "gap");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "distance", Integer.class, gap.getDistance());
        setValue(edgeTable, edgeRow, "variance", Integer.class, gap.getVarianceOpt().orElse(null));
    }
    logger.info("converted gaps to " + gaps.size() + " edges");
    nodes.clear();
    edges.clear();
    gaps.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
    // todo: convert to gfa1 paths?
    // note paths in gfa2 can have references to segments, edges, or other groups
    // assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : Gfa2Adapter(org.dishevelled.bio.assembly.gfa2.Gfa2Adapter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa2.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa2.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa2.Reference) Orientation(org.dishevelled.bio.assembly.gfa2.Orientation) CyEdge(org.cytoscape.model.CyEdge) Gap(org.dishevelled.bio.assembly.gfa2.Gap) BufferedReader(java.io.BufferedReader) CyEdge(org.cytoscape.model.CyEdge) Edge(org.dishevelled.bio.assembly.gfa2.Edge)

Aggregations

HashBasedTable (com.google.common.collect.HashBasedTable)2 Table (com.google.common.collect.Table)2 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 CyEdge (org.cytoscape.model.CyEdge)2 CyNetwork (org.cytoscape.model.CyNetwork)2 CyNode (org.cytoscape.model.CyNode)2 CyRow (org.cytoscape.model.CyRow)2 CyTable (org.cytoscape.model.CyTable)2 Gfa1Adapter (org.dishevelled.bio.assembly.gfa1.Gfa1Adapter)1 Link (org.dishevelled.bio.assembly.gfa1.Link)1 Orientation (org.dishevelled.bio.assembly.gfa1.Orientation)1 Path (org.dishevelled.bio.assembly.gfa1.Path)1 Reference (org.dishevelled.bio.assembly.gfa1.Reference)1 Segment (org.dishevelled.bio.assembly.gfa1.Segment)1 Traversal (org.dishevelled.bio.assembly.gfa1.Traversal)1 Edge (org.dishevelled.bio.assembly.gfa2.Edge)1 Gap (org.dishevelled.bio.assembly.gfa2.Gap)1