Search in sources :

Example 1 with Alignment

use of org.dishevelled.bio.assembly.gfa2.Alignment in project dishevelled-bio by heuermh.

the class Gfa1ToGfa2 method call.

@Override
public Integer call() throws Exception {
    PrintWriter writer = null;
    try {
        writer = writer(outputGfa2File);
        final PrintWriter w = writer;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean header(final Header header) {
                // convert VN:Z:1.0 to VN:Z:2.0 annotation if present
                if (header.getAnnotations().containsKey("VN")) {
                    if (!"1.0".equals(header.getAnnotations().get("VN").getValue())) {
                        throw new RuntimeException("cannot convert input as GFA 1.0, was " + header.getAnnotations().get("VN").getValue());
                    }
                    Map<String, Annotation> annotations = new HashMap<String, Annotation>();
                    annotations.put("VN", new Annotation("VN", "Z", "2.0"));
                    for (Annotation annotation : header.getAnnotations().values()) {
                        if (!"VN".equals(annotation.getName())) {
                            annotations.put(annotation.getName(), annotation);
                        }
                    }
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(annotations), w);
                } else {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(header.getAnnotations()), w);
                }
                return true;
            }

            @Override
            public boolean segment(final Segment segment) {
                if (segment.getSequence() != null) {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), segment.getSequence().length(), segment.getSequence(), segment.getAnnotations()), w);
                } else if (segment.getAnnotations().containsKey("LN")) {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), Integer.parseInt(segment.getAnnotations().get("LN").getValue()), segment.getSequence(), segment.getAnnotations()), w);
                } else {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), 0, segment.getSequence(), segment.getAnnotations()), w);
                }
                return true;
            }

            @Override
            public boolean link(final Link link) {
                Position unknown = new Position(0, false);
                Alignment alignment = link.getOverlap() == null ? null : Alignment.valueOf(link.getOverlap());
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(link.getSource()), toGfa2Reference(link.getTarget()), unknown, unknown, unknown, unknown, alignment, link.getAnnotations()), w);
                return true;
            }

            @Override
            public boolean containment(final Containment containment) {
                Position unknown = new Position(0, false);
                Position targetStart = new Position(containment.getPosition(), false);
                Alignment alignment = containment.getOverlap() == null ? null : Alignment.valueOf(containment.getOverlap());
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(containment.getContainer()), toGfa2Reference(containment.getContained()), unknown, unknown, targetStart, unknown, alignment, containment.getAnnotations()), w);
                return true;
            }

            @Override
            public boolean path(final Path path) {
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Path(path.getName(), toGfa2References(path.getSegments()), path.getAnnotations()), w);
                return true;
            }
        });
        return 0;
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
        // empty
        }
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Position(org.dishevelled.bio.assembly.gfa2.Position) Annotation(org.dishevelled.bio.annotation.Annotation) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) Alignment(org.dishevelled.bio.assembly.gfa2.Alignment) Header(org.dishevelled.bio.assembly.gfa1.Header) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) HashMap(java.util.HashMap) Map(java.util.Map) Link(org.dishevelled.bio.assembly.gfa1.Link) PrintWriter(java.io.PrintWriter) Containment(org.dishevelled.bio.assembly.gfa1.Containment)

Example 2 with Alignment

use of org.dishevelled.bio.assembly.gfa2.Alignment in project dishevelled by heuermh.

the class ImportGfa2Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 2.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa2Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final List<Edge> edges = new ArrayList<Edge>();
    final List<Gap> gaps = new ArrayList<Gap>();
    final List<Path> paths = new ArrayList<Path>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream edges, gaps, and paths, looking for reverse orientation references
        stream(readable, new Gfa2Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            public boolean edge(final Edge edge) {
                putIfAbsent(edge.getSource());
                putIfAbsent(edge.getTarget());
                edges.add(edge);
                return true;
            }

            @Override
            public boolean gap(final Gap gap) {
                putIfAbsent(gap.getSource());
                putIfAbsent(gap.getTarget());
                gaps.add(gap);
                return true;
            }

            @Override
            public boolean path(final Path path) {
                for (Reference reference : path.getReferences()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }
        });
    }
    logger.info("read {} segments, {} edges, {} gaps, and {} paths from {}", new Object[] { segmentsById.size(), edges.size(), gaps.size(), paths.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLength();
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from edges and gaps ...");
    for (Edge edge : edges) {
        String sourceId = edge.getSource().getId();
        String sourceOrientation = edge.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = edge.getTarget().getId();
        String targetOrientation = edge.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge cyEdge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(cyEdge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, edge.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "sourceStart", String.class, edge.getSourceStart().toString());
        setValue(edgeTable, edgeRow, "sourceEnd", String.class, edge.getSourceEnd().toString());
        setValue(edgeTable, edgeRow, "targetStart", String.class, edge.getTargetStart().toString());
        setValue(edgeTable, edgeRow, "targetEnd", String.class, edge.getTargetEnd().toString());
        setValue(edgeTable, edgeRow, "alignment", String.class, edge.hasAlignment() ? edge.getAlignment().toString() : null);
        setValue(edgeTable, edgeRow, "readCount", Integer.class, edge.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, edge.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, edge.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, edge.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, edge.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted edges to " + edges.size() + " edges");
    for (Gap gap : gaps) {
        String sourceId = gap.getSource().getId();
        String sourceOrientation = gap.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = gap.getTarget().getId();
        String targetOrientation = gap.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, gap.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "gap");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "distance", Integer.class, gap.getDistance());
        setValue(edgeTable, edgeRow, "variance", Integer.class, gap.getVarianceOpt().orElse(null));
    }
    logger.info("converted gaps to " + gaps.size() + " edges");
    nodes.clear();
    edges.clear();
    gaps.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
    // todo: convert to gfa1 paths?
    // note paths in gfa2 can have references to segments, edges, or other groups
    // assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : Gfa2Adapter(org.dishevelled.bio.assembly.gfa2.Gfa2Adapter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa2.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa2.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa2.Reference) Orientation(org.dishevelled.bio.assembly.gfa2.Orientation) CyEdge(org.cytoscape.model.CyEdge) Gap(org.dishevelled.bio.assembly.gfa2.Gap) BufferedReader(java.io.BufferedReader) CyEdge(org.cytoscape.model.CyEdge) Edge(org.dishevelled.bio.assembly.gfa2.Edge)

Aggregations

HashMap (java.util.HashMap)2 HashBasedTable (com.google.common.collect.HashBasedTable)1 Table (com.google.common.collect.Table)1 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 CyEdge (org.cytoscape.model.CyEdge)1 CyNetwork (org.cytoscape.model.CyNetwork)1 CyNode (org.cytoscape.model.CyNode)1 CyRow (org.cytoscape.model.CyRow)1 CyTable (org.cytoscape.model.CyTable)1 Annotation (org.dishevelled.bio.annotation.Annotation)1 Containment (org.dishevelled.bio.assembly.gfa1.Containment)1 Gfa1Adapter (org.dishevelled.bio.assembly.gfa1.Gfa1Adapter)1 Header (org.dishevelled.bio.assembly.gfa1.Header)1 Link (org.dishevelled.bio.assembly.gfa1.Link)1 Path (org.dishevelled.bio.assembly.gfa1.Path)1 Segment (org.dishevelled.bio.assembly.gfa1.Segment)1