Search in sources :

Example 1 with Gfa1Adapter

use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled by heuermh.

the class ImportGfa1Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 1.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa1Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final List<Path> paths = new ArrayList<Path>();
    final List<Link> links = new ArrayList<Link>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream paths and links, looking for reverse orientation references
        stream(readable, new Gfa1Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            protected boolean path(final Path path) {
                for (Reference reference : path.getSegments()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }

            @Override
            protected boolean link(final Link link) {
                putIfAbsent(link.getSource());
                putIfAbsent(link.getTarget());
                links.add(link);
                return true;
            }

            @Override
            protected boolean traversal(final Traversal traversal) {
                traversalsByPathName.put(traversal.getPathName(), traversal);
                return true;
            }
        });
    }
    logger.info("read {} segments, {} links, {} paths, and {} traversals from {}", new Object[] { segmentsById.size(), links.size(), paths.size(), traversalsByPathName.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLengthOpt().orElse(null);
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from links ...");
    for (Link link : links) {
        String sourceId = link.getSource().getId();
        String sourceOrientation = link.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = link.getTarget().getId();
        String targetOrientation = link.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, link.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "overlap", String.class, link.getOverlapOpt().orElse(null));
        setValue(edgeTable, edgeRow, "readCount", Integer.class, link.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, link.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, link.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, link.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, link.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted links to " + links.size() + " edges");
    nodes.clear();
    links.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
        assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa1.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa1.Reference) Orientation(org.dishevelled.bio.assembly.gfa1.Orientation) CyEdge(org.cytoscape.model.CyEdge) BufferedReader(java.io.BufferedReader) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) Link(org.dishevelled.bio.assembly.gfa1.Link)

Example 2 with Gfa1Adapter

use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.

the class Gfa1ToGfa2 method call.

@Override
public Integer call() throws Exception {
    PrintWriter writer = null;
    try {
        writer = writer(outputGfa2File);
        final PrintWriter w = writer;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean header(final Header header) {
                // convert VN:Z:1.0 to VN:Z:2.0 annotation if present
                if (header.getAnnotations().containsKey("VN")) {
                    if (!"1.0".equals(header.getAnnotations().get("VN").getValue())) {
                        throw new RuntimeException("cannot convert input as GFA 1.0, was " + header.getAnnotations().get("VN").getValue());
                    }
                    Map<String, Annotation> annotations = new HashMap<String, Annotation>();
                    annotations.put("VN", new Annotation("VN", "Z", "2.0"));
                    for (Annotation annotation : header.getAnnotations().values()) {
                        if (!"VN".equals(annotation.getName())) {
                            annotations.put(annotation.getName(), annotation);
                        }
                    }
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(annotations), w);
                } else {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(header.getAnnotations()), w);
                }
                return true;
            }

            @Override
            public boolean segment(final Segment segment) {
                if (segment.getSequence() != null) {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), segment.getSequence().length(), segment.getSequence(), segment.getAnnotations()), w);
                } else if (segment.getAnnotations().containsKey("LN")) {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), Integer.parseInt(segment.getAnnotations().get("LN").getValue()), segment.getSequence(), segment.getAnnotations()), w);
                } else {
                    Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), 0, segment.getSequence(), segment.getAnnotations()), w);
                }
                return true;
            }

            @Override
            public boolean link(final Link link) {
                Position unknown = new Position(0, false);
                Alignment alignment = link.getOverlap() == null ? null : Alignment.valueOf(link.getOverlap());
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(link.getSource()), toGfa2Reference(link.getTarget()), unknown, unknown, unknown, unknown, alignment, link.getAnnotations()), w);
                return true;
            }

            @Override
            public boolean containment(final Containment containment) {
                Position unknown = new Position(0, false);
                Position targetStart = new Position(containment.getPosition(), false);
                Alignment alignment = containment.getOverlap() == null ? null : Alignment.valueOf(containment.getOverlap());
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(containment.getContainer()), toGfa2Reference(containment.getContained()), unknown, unknown, targetStart, unknown, alignment, containment.getAnnotations()), w);
                return true;
            }

            @Override
            public boolean path(final Path path) {
                Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Path(path.getName(), toGfa2References(path.getSegments()), path.getAnnotations()), w);
                return true;
            }
        });
        return 0;
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
        // empty
        }
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Position(org.dishevelled.bio.assembly.gfa2.Position) Annotation(org.dishevelled.bio.annotation.Annotation) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) Alignment(org.dishevelled.bio.assembly.gfa2.Alignment) Header(org.dishevelled.bio.assembly.gfa1.Header) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) HashMap(java.util.HashMap) Map(java.util.Map) Link(org.dishevelled.bio.assembly.gfa1.Link) PrintWriter(java.io.PrintWriter) Containment(org.dishevelled.bio.assembly.gfa1.Containment)

Example 3 with Gfa1Adapter

use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.

the class SegmentsToCytoscapeNodes method call.

@Override
public Integer call() throws Exception {
    PrintWriter nodesWriter = null;
    try {
        nodesWriter = writer(outputNodesFile);
        nodesWriter.println(HEADER);
        final PrintWriter nw = nodesWriter;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean segment(final Segment segment) {
                StringBuilder sb = new StringBuilder();
                sb.append(segment.getName());
                sb.append("\t");
                sb.append(segment.getSequenceOpt().orElse(""));
                sb.append("\t");
                sb.append(segment.containsLength() ? segment.getLength() : "");
                sb.append("\t");
                sb.append(segment.containsReadCount() ? segment.getReadCount() : "");
                sb.append("\t");
                sb.append(segment.containsFragmentCount() ? segment.getFragmentCount() : "");
                sb.append("\t");
                sb.append(segment.containsKmerCount() ? segment.getKmerCount() : "");
                sb.append("\t");
                sb.append(segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : "");
                sb.append("\t");
                sb.append(segment.getSequenceUriOpt().orElse(""));
                nw.println(sb);
                return true;
            }
        });
        return 0;
    } finally {
        try {
            nodesWriter.close();
        } catch (Exception e) {
        // empty
        }
    }
}
Also used : Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Example 4 with Gfa1Adapter

use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.

the class ExportSegments method call.

@Override
public Integer call() throws Exception {
    BufferedReader reader = null;
    PrintWriter writer = null;
    try {
        reader = reader(inputGfa1File);
        writer = writer(outputFastaFile);
        final PrintWriter w = writer;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean segment(final Segment segment) {
                if (segment.hasSequence()) {
                    w.print(">");
                    w.println(describeSegment(segment));
                    String sequence = segment.getSequence();
                    for (int i = 0, length = sequence.length(); i <= length; i += lineWidth) {
                        w.println(sequence.substring(i, Math.min(i + lineWidth, length)));
                    }
                }
                return true;
            }
        });
        return 0;
    } finally {
        try {
            reader.close();
        } catch (Exception e) {
        // ignore
        }
        try {
            writer.close();
        } catch (Exception e) {
        // ignore
        }
    }
}
Also used : BufferedReader(java.io.BufferedReader) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) Segment(org.dishevelled.bio.assembly.gfa1.Segment) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Example 5 with Gfa1Adapter

use of org.dishevelled.bio.assembly.gfa1.Gfa1Adapter in project dishevelled-bio by heuermh.

the class TraversalsToPropertyGraph method call.

@Override
public Integer call() throws Exception {
    PrintWriter edgesWriter = null;
    try {
        edgesWriter = writer(outputEdgesFile);
        edgesWriter.println(HEADER);
        final PrintWriter ew = edgesWriter;
        Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {

            @Override
            public boolean traversal(final Traversal traversal) {
                if (!traversal.containsId()) {
                    throw new IllegalArgumentException("traversal identifiers are required for property graph CSV format");
                }
                StringBuilder sb = new StringBuilder();
                sb.append(traversal.getId());
                sb.append(",");
                sb.append(traversal.getSource().getName());
                sb.append(",");
                sb.append(traversal.getTarget().getName());
                sb.append(",");
                sb.append(traversal.getSource().getOrientation().getSymbol());
                sb.append(",");
                sb.append(traversal.getTarget().getOrientation().getSymbol());
                sb.append(",t,");
                sb.append(traversal.getPathName());
                sb.append(",");
                sb.append(traversal.getOrdinal());
                sb.append(",");
                sb.append(traversal.getOverlapOpt().orElse(""));
                ew.println(sb);
                return true;
            }
        });
        return 0;
    } finally {
        try {
            edgesWriter.close();
        } catch (Exception e) {
        // empty
        }
    }
}
Also used : Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Gfa1Adapter(org.dishevelled.bio.assembly.gfa1.Gfa1Adapter) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Aggregations

Gfa1Adapter (org.dishevelled.bio.assembly.gfa1.Gfa1Adapter)9 PrintWriter (java.io.PrintWriter)8 CommandLineParseException (org.dishevelled.commandline.CommandLineParseException)8 Segment (org.dishevelled.bio.assembly.gfa1.Segment)5 Link (org.dishevelled.bio.assembly.gfa1.Link)4 Traversal (org.dishevelled.bio.assembly.gfa1.Traversal)3 BufferedReader (java.io.BufferedReader)2 HashMap (java.util.HashMap)2 Path (org.dishevelled.bio.assembly.gfa1.Path)2 HashBasedTable (com.google.common.collect.HashBasedTable)1 Table (com.google.common.collect.Table)1 FileReader (java.io.FileReader)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 CyEdge (org.cytoscape.model.CyEdge)1 CyNetwork (org.cytoscape.model.CyNetwork)1 CyNode (org.cytoscape.model.CyNode)1 CyRow (org.cytoscape.model.CyRow)1 CyTable (org.cytoscape.model.CyTable)1 Annotation (org.dishevelled.bio.annotation.Annotation)1