Search in sources :

Example 6 with Reference

use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled-bio by heuermh.

the class TraversePaths method call.

@Override
public Integer call() throws Exception {
    BufferedReader reader = null;
    PrintWriter writer = null;
    try {
        reader = reader(inputGfa1File);
        writer = writer(outputGfa1File);
        final PrintWriter w = writer;
        Gfa1Reader.stream(reader, new Gfa1Listener() {

            @Override
            public boolean record(final Gfa1Record gfa1Record) {
                Gfa1Writer.write(gfa1Record, w);
                if (gfa1Record instanceof Path) {
                    Path path = (Path) gfa1Record;
                    int size = path.getSegments().size();
                    Reference source = null;
                    Reference target = null;
                    String overlap = null;
                    for (int i = 0; i < size; i++) {
                        target = path.getSegments().get(i);
                        if (i > 0) {
                            overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
                        }
                        if (source != null) {
                            Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, EMPTY_ANNOTATIONS);
                            Gfa1Writer.write(traversal, w);
                        }
                        source = target;
                    }
                }
                return true;
            }
        });
        return 0;
    } finally {
        try {
            reader.close();
        } catch (Exception e) {
        // ignore
        }
        try {
            writer.close();
        } catch (Exception e) {
        // ignore
        }
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Gfa1Record(org.dishevelled.bio.assembly.gfa1.Gfa1Record) Reference(org.dishevelled.bio.assembly.gfa1.Reference) BufferedReader(java.io.BufferedReader) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Gfa1Listener(org.dishevelled.bio.assembly.gfa1.Gfa1Listener) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) PrintWriter(java.io.PrintWriter)

Example 7 with Reference

use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled by heuermh.

the class ImportGfa2Task method run.

@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
    taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 2.0 format");
    final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
    taskMonitor.setStatusMessage("Reading segments from file ...");
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream segments, building cache
        stream(readable, new Gfa2Adapter() {

            @Override
            protected boolean segment(final Segment segment) {
                segmentsById.put(segment.getId(), segment);
                return true;
            }
        });
    }
    taskMonitor.setStatusMessage("Finding reverse orientation references ...");
    final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
    final List<Edge> edges = new ArrayList<Edge>();
    final List<Gap> gaps = new ArrayList<Gap>();
    final List<Path> paths = new ArrayList<Path>();
    try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
        // stream edges, gaps, and paths, looking for reverse orientation references
        stream(readable, new Gfa2Adapter() {

            private void putIfAbsent(final Reference reference) {
                Segment segment = segmentsById.get(reference.getId());
                if (segment == null) {
                    throw new RuntimeException("could not find segment by id " + reference.getId());
                }
                if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
                    segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
                }
            }

            @Override
            public boolean edge(final Edge edge) {
                putIfAbsent(edge.getSource());
                putIfAbsent(edge.getTarget());
                edges.add(edge);
                return true;
            }

            @Override
            public boolean gap(final Gap gap) {
                putIfAbsent(gap.getSource());
                putIfAbsent(gap.getTarget());
                gaps.add(gap);
                return true;
            }

            @Override
            public boolean path(final Path path) {
                for (Reference reference : path.getReferences()) {
                    putIfAbsent(reference);
                }
                if (loadPaths) {
                    paths.add(path);
                }
                return true;
            }
        });
    }
    logger.info("read {} segments, {} edges, {} gaps, and {} paths from {}", new Object[] { segmentsById.size(), edges.size(), gaps.size(), paths.size(), inputFile });
    segmentsById.clear();
    taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
    final CyNetwork network = applicationManager.getCurrentNetwork();
    final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
    for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
        String id = c.getRowKey();
        Orientation orientation = c.getColumnKey();
        Segment segment = c.getValue();
        String name = id + (orientation.isForward() ? "+" : "-");
        if (!nodes.containsKey(name)) {
            CyNode node = network.addNode();
            CyTable nodeTable = network.getDefaultNodeTable();
            CyRow nodeRow = nodeTable.getRow(node.getSUID());
            Integer length = segment.getLength();
            Integer readCount = segment.getReadCountOpt().orElse(null);
            Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
            Integer kmerCount = segment.getKmerCountOpt().orElse(null);
            String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
            String sequenceUri = segment.getSequenceUriOpt().orElse(null);
            setValue(nodeTable, nodeRow, "name", String.class, name);
            setValue(nodeTable, nodeRow, "length", Integer.class, length);
            setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
            setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
            setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
            setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
            setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
            // default display length to length
            Integer displayLength = length;
            String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
            if (sequence != null) {
                Integer sequenceLength = sequence.length();
                String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
                Integer displaySequenceLength = displaySequence.length();
                if (loadSequences) {
                    setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
                }
                setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
                setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
                setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
                // override display length with sequence length if necessary
                if (length == null || length != sequenceLength) {
                    displayLength = sequenceLength;
                }
            }
            StringBuilder sb = new StringBuilder();
            sb.append(name);
            if (displayLength != null) {
                sb.append("  ");
                sb.append(displayLength);
                sb.append(" bp");
            }
            String displayName = sb.toString();
            if (readCount != null) {
                sb.append(" ");
                sb.append(readCount);
                sb.append(" reads");
            }
            if (fragmentCount != null) {
                sb.append(" ");
                sb.append(fragmentCount);
                sb.append(" fragments");
            }
            if (kmerCount != null) {
                sb.append(" ");
                sb.append(kmerCount);
                sb.append(" kmers");
            }
            String displayLabel = sb.toString();
            setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
            setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
            setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
            nodes.put(name, node);
        }
    }
    logger.info("converted segments and orientation to " + nodes.size() + " nodes");
    segmentsByOrientation.clear();
    taskMonitor.setStatusMessage("Building Cytoscape edges from edges and gaps ...");
    for (Edge edge : edges) {
        String sourceId = edge.getSource().getId();
        String sourceOrientation = edge.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = edge.getTarget().getId();
        String targetOrientation = edge.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge cyEdge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(cyEdge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, edge.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "edge");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "sourceStart", String.class, edge.getSourceStart().toString());
        setValue(edgeTable, edgeRow, "sourceEnd", String.class, edge.getSourceEnd().toString());
        setValue(edgeTable, edgeRow, "targetStart", String.class, edge.getTargetStart().toString());
        setValue(edgeTable, edgeRow, "targetEnd", String.class, edge.getTargetEnd().toString());
        setValue(edgeTable, edgeRow, "alignment", String.class, edge.hasAlignment() ? edge.getAlignment().toString() : null);
        setValue(edgeTable, edgeRow, "readCount", Integer.class, edge.getReadCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, edge.getFragmentCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "kmerCount", Integer.class, edge.getKmerCountOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, edge.getMappingQualityOpt().orElse(null));
        setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, edge.getMismatchCountOpt().orElse(null));
    }
    logger.info("converted edges to " + edges.size() + " edges");
    for (Gap gap : gaps) {
        String sourceId = gap.getSource().getId();
        String sourceOrientation = gap.getSource().isForwardOrientation() ? "+" : "-";
        String targetId = gap.getTarget().getId();
        String targetOrientation = gap.getTarget().isForwardOrientation() ? "+" : "-";
        CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
        CyNode targetNode = nodes.get(targetId + targetOrientation);
        CyEdge edge = network.addEdge(sourceNode, targetNode, true);
        CyTable edgeTable = network.getDefaultEdgeTable();
        CyRow edgeRow = edgeTable.getRow(edge.getSUID());
        setValue(edgeTable, edgeRow, "id", String.class, gap.getIdOpt().orElse(null));
        setValue(edgeTable, edgeRow, "type", String.class, "gap");
        setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
        setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
        setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
        setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
        setValue(edgeTable, edgeRow, "distance", Integer.class, gap.getDistance());
        setValue(edgeTable, edgeRow, "variance", Integer.class, gap.getVarianceOpt().orElse(null));
    }
    logger.info("converted gaps to " + gaps.size() + " edges");
    nodes.clear();
    edges.clear();
    gaps.clear();
    // pass paths to AssemblyApp if requested
    if (loadPaths && !paths.isEmpty()) {
        taskMonitor.setStatusMessage("Loading paths in path view ...");
        assemblyModel.setInputFileName(inputFile.toString());
    // todo: convert to gfa1 paths?
    // note paths in gfa2 can have references to segments, edges, or other groups
    // assemblyModel.setPaths(paths, traversalsByPathName);
    }
}
Also used : Gfa2Adapter(org.dishevelled.bio.assembly.gfa2.Gfa2Adapter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CyNetwork(org.cytoscape.model.CyNetwork) CyRow(org.cytoscape.model.CyRow) Segment(org.dishevelled.bio.assembly.gfa2.Segment) CyTable(org.cytoscape.model.CyTable) FileReader(java.io.FileReader) CyNode(org.cytoscape.model.CyNode) Path(org.dishevelled.bio.assembly.gfa2.Path) HashBasedTable(com.google.common.collect.HashBasedTable) CyTable(org.cytoscape.model.CyTable) Table(com.google.common.collect.Table) Reference(org.dishevelled.bio.assembly.gfa2.Reference) Orientation(org.dishevelled.bio.assembly.gfa2.Orientation) CyEdge(org.cytoscape.model.CyEdge) Gap(org.dishevelled.bio.assembly.gfa2.Gap) BufferedReader(java.io.BufferedReader) CyEdge(org.cytoscape.model.CyEdge) Edge(org.dishevelled.bio.assembly.gfa2.Edge)

Example 8 with Reference

use of org.dishevelled.bio.assembly.gfa2.Reference in project molgenis-emx2 by molgenis.

the class SqlColumnRefArrayExecutor method createReferenceExistsCheck.

/**
 * trigger on this column to check if foreign key exists. Might be composite key, i.e., list of
 * columns
 */
private static void createReferenceExistsCheck(DSLContext jooq, Column column) {
    String schemaName = column.getSchema().getName();
    Name thisTable = name(schemaName, column.getTable().getTableName());
    Name toTable = name(column.getRefSchema(), column.getRefTableName());
    String functionName = getReferenceExistsCheckName(column);
    List<Reference> references = column.getReferences();
    String fromColumns = references.stream().map(r -> name(r.getName()).toString()).collect(Collectors.joining(","));
    String toColumns = references.stream().map(r -> name(r.getRefTo()).toString()).collect(Collectors.joining(","));
    String errorColumns = references.stream().map(r -> "COALESCE(error_row." + name(r.getRefTo()).toString() + ",'NULL')").collect(Collectors.joining("||','||"));
    String exceptFilter = references.stream().map(r -> {
        if (r.isOverlappingRef()) {
            return name(r.getRefTo()) + " = NEW." + name(r.getName());
        } else {
            return name(r.getRefTo()) + " = ANY (NEW." + name(r.getName()) + ")";
        }
    }).collect(Collectors.joining(" AND "));
    String unnestRefs = references.stream().map(r -> {
        // can be overlapping with non_array reference
        if (r.isOverlappingRef()) {
            return "NEW." + name(r.getName()) + " AS " + name(r.getRefTo());
        } else {
            return "UNNEST(NEW." + name(r.getName()) + ") AS " + name(r.getRefTo());
        }
    }).collect(Collectors.joining(","));
    String nonRefLinkFieldsAreNotNull = references.stream().filter(r -> !r.isOverlapping()).map(r2 -> "error_row." + name(r2.getRefTo()) + " IS NOT NULL ").collect(Collectors.joining(" OR "));
    jooq.execute("CREATE OR REPLACE FUNCTION {0}() RETURNS trigger AS $BODY$ " + "\nDECLARE error_row RECORD;" + "\nBEGIN" + "\n\tFOR error_row IN SELECT {1} EXCEPT SELECT {2} FROM {3} WHERE {10} LOOP" + // exclude if only refLink fields are set
    "\n\t\tIF {11} THEN" + "\n\t\t\tRAISE EXCEPTION USING ERRCODE='23503', MESSAGE = 'insert or update on table \"'||{9}||'\" violates foreign key (ref_array) constraint'" + " , DETAIL = 'Key ('||{6}||')=('|| {5} ||') is not present in table \"'||{7}||'\", column(s)('||{8}||')';" + "\n\t\tEND IF;" + "\n\tEND LOOP;" + "\n\tRETURN NEW;" + "\nEND; $BODY$ LANGUAGE plpgsql;", // 0
    name(schemaName, functionName), // 1
    keyword(unnestRefs), // 2
    keyword(toColumns), // 3
    toTable, // 4
    thisTable, // 5
    keyword(errorColumns), // 6
    inline(fromColumns), // 7
    inline(column.getRefTableName()), // 8
    inline(toColumns), // 9
    inline(column.getTableName()), // 10
    keyword(exceptFilter), // 11
    keyword(nonRefLinkFieldsAreNotNull));
    // add the trigger
    jooq.execute("CREATE CONSTRAINT TRIGGER {0} " + "\n\tAFTER INSERT OR UPDATE OF {1} ON {2} FROM {3}" + "\n\tDEFERRABLE INITIALLY IMMEDIATE " + "\n\tFOR EACH ROW EXECUTE PROCEDURE {4}()", name(functionName), keyword(fromColumns), thisTable, toTable, name(column.getTable().getSchema().getName(), functionName));
}
Also used : Column(org.molgenis.emx2.Column) SqlColumnExecutor.validateColumn(org.molgenis.emx2.sql.SqlColumnExecutor.validateColumn) List(java.util.List) DSL(org.jooq.impl.DSL) Name(org.jooq.Name) Collection(java.util.Collection) Reference(org.molgenis.emx2.Reference) DSLContext(org.jooq.DSLContext) Collectors(java.util.stream.Collectors) Reference(org.molgenis.emx2.Reference) Name(org.jooq.Name)

Example 9 with Reference

use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled-bio by heuermh.

the class ReassemblePaths method call.

@Override
public Integer call() throws Exception {
    BufferedReader reader = null;
    PrintWriter writer = null;
    try {
        reader = reader(inputGfa1File);
        writer = writer(outputGfa1File);
        final PrintWriter w = writer;
        final List<Path> paths = new ArrayList<Path>();
        final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
        Gfa1Reader.stream(reader, new Gfa1Listener() {

            @Override
            public boolean record(final Gfa1Record gfa1Record) {
                if (gfa1Record instanceof Path) {
                    Path path = (Path) gfa1Record;
                    paths.add(path);
                } else if (gfa1Record instanceof Traversal) {
                    Traversal traversal = (Traversal) gfa1Record;
                    traversalsByPathName.put(traversal.getPathName(), traversal);
                } else {
                    Gfa1Writer.write(gfa1Record, w);
                }
                return true;
            }
        });
        for (Path path : paths) {
            List<Traversal> traversals = traversalsByPathName.get(path.getName());
            Collections.sort(traversals, new Comparator<Traversal>() {

                @Override
                public int compare(final Traversal t0, final Traversal t1) {
                    return t0.getOrdinal() - t1.getOrdinal();
                }
            });
            List<Reference> segments = new ArrayList<Reference>();
            List<String> overlaps = new ArrayList<String>();
            for (Traversal traversal : traversals) {
                if (segments.isEmpty()) {
                    segments.add(traversal.getSource());
                }
                segments.add(traversal.getTarget());
                if (traversal.hasOverlap()) {
                    overlaps.add(traversal.getOverlap());
                }
            }
            Gfa1Writer.write(new Path(path.getName(), segments, overlaps.isEmpty() ? null : overlaps, path.getAnnotations()), w);
        }
        return 0;
    } finally {
        try {
            reader.close();
        } catch (Exception e) {
        // ignore
        }
        try {
            writer.close();
        } catch (Exception e) {
        // ignore
        }
    }
}
Also used : Path(org.dishevelled.bio.assembly.gfa1.Path) Reference(org.dishevelled.bio.assembly.gfa1.Reference) ArrayList(java.util.ArrayList) Traversal(org.dishevelled.bio.assembly.gfa1.Traversal) Gfa1Listener(org.dishevelled.bio.assembly.gfa1.Gfa1Listener) CommandLineParseException(org.dishevelled.commandline.CommandLineParseException) Gfa1Record(org.dishevelled.bio.assembly.gfa1.Gfa1Record) BufferedReader(java.io.BufferedReader) PrintWriter(java.io.PrintWriter)

Aggregations

BufferedReader (java.io.BufferedReader)4 ArrayList (java.util.ArrayList)4 Reference (org.dishevelled.bio.assembly.gfa1.Reference)4 Traversal (org.dishevelled.bio.assembly.gfa1.Traversal)4 Path (org.dishevelled.bio.assembly.gfa1.Path)3 HashBasedTable (com.google.common.collect.HashBasedTable)2 Table (com.google.common.collect.Table)2 FileReader (java.io.FileReader)2 PrintWriter (java.io.PrintWriter)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 CyEdge (org.cytoscape.model.CyEdge)2 CyNetwork (org.cytoscape.model.CyNetwork)2 CyNode (org.cytoscape.model.CyNode)2 CyRow (org.cytoscape.model.CyRow)2 CyTable (org.cytoscape.model.CyTable)2 Gfa1Listener (org.dishevelled.bio.assembly.gfa1.Gfa1Listener)2 Gfa1Record (org.dishevelled.bio.assembly.gfa1.Gfa1Record)2 CommandLineParseException (org.dishevelled.commandline.CommandLineParseException)2