use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled-bio by heuermh.
the class TraversePaths method call.
@Override
public Integer call() throws Exception {
BufferedReader reader = null;
PrintWriter writer = null;
try {
reader = reader(inputGfa1File);
writer = writer(outputGfa1File);
final PrintWriter w = writer;
Gfa1Reader.stream(reader, new Gfa1Listener() {
@Override
public boolean record(final Gfa1Record gfa1Record) {
Gfa1Writer.write(gfa1Record, w);
if (gfa1Record instanceof Path) {
Path path = (Path) gfa1Record;
int size = path.getSegments().size();
Reference source = null;
Reference target = null;
String overlap = null;
for (int i = 0; i < size; i++) {
target = path.getSegments().get(i);
if (i > 0) {
overlap = (path.getOverlaps() != null && path.getOverlaps().size() > i) ? path.getOverlaps().get(i - 1) : null;
}
if (source != null) {
Traversal traversal = new Traversal(path.getName(), i - 1, source, target, overlap, EMPTY_ANNOTATIONS);
Gfa1Writer.write(traversal, w);
}
source = target;
}
}
return true;
}
});
return 0;
} finally {
try {
reader.close();
} catch (Exception e) {
// ignore
}
try {
writer.close();
} catch (Exception e) {
// ignore
}
}
}
use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled by heuermh.
the class ImportGfa2Task method run.
@Override
public void run(final TaskMonitor taskMonitor) throws Exception {
taskMonitor.setTitle("Import a network in Graphical Fragment Assembly (GFA) 2.0 format");
final Map<String, Segment> segmentsById = new HashMap<String, Segment>();
taskMonitor.setStatusMessage("Reading segments from file ...");
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream segments, building cache
stream(readable, new Gfa2Adapter() {
@Override
protected boolean segment(final Segment segment) {
segmentsById.put(segment.getId(), segment);
return true;
}
});
}
taskMonitor.setStatusMessage("Finding reverse orientation references ...");
final Table<String, Orientation, Segment> segmentsByOrientation = HashBasedTable.create();
final List<Edge> edges = new ArrayList<Edge>();
final List<Gap> gaps = new ArrayList<Gap>();
final List<Path> paths = new ArrayList<Path>();
try (BufferedReader readable = new BufferedReader(new FileReader(inputFile))) {
// stream edges, gaps, and paths, looking for reverse orientation references
stream(readable, new Gfa2Adapter() {
private void putIfAbsent(final Reference reference) {
Segment segment = segmentsById.get(reference.getId());
if (segment == null) {
throw new RuntimeException("could not find segment by id " + reference.getId());
}
if (!segmentsByOrientation.contains(reference.getId(), reference.getOrientation())) {
segmentsByOrientation.put(reference.getId(), reference.getOrientation(), segment);
}
}
@Override
public boolean edge(final Edge edge) {
putIfAbsent(edge.getSource());
putIfAbsent(edge.getTarget());
edges.add(edge);
return true;
}
@Override
public boolean gap(final Gap gap) {
putIfAbsent(gap.getSource());
putIfAbsent(gap.getTarget());
gaps.add(gap);
return true;
}
@Override
public boolean path(final Path path) {
for (Reference reference : path.getReferences()) {
putIfAbsent(reference);
}
if (loadPaths) {
paths.add(path);
}
return true;
}
});
}
logger.info("read {} segments, {} edges, {} gaps, and {} paths from {}", new Object[] { segmentsById.size(), edges.size(), gaps.size(), paths.size(), inputFile });
segmentsById.clear();
taskMonitor.setStatusMessage("Building Cytoscape nodes from segments ...");
final CyNetwork network = applicationManager.getCurrentNetwork();
final Map<String, CyNode> nodes = new HashMap<String, CyNode>(segmentsByOrientation.size());
for (Table.Cell<String, Orientation, Segment> c : segmentsByOrientation.cellSet()) {
String id = c.getRowKey();
Orientation orientation = c.getColumnKey();
Segment segment = c.getValue();
String name = id + (orientation.isForward() ? "+" : "-");
if (!nodes.containsKey(name)) {
CyNode node = network.addNode();
CyTable nodeTable = network.getDefaultNodeTable();
CyRow nodeRow = nodeTable.getRow(node.getSUID());
Integer length = segment.getLength();
Integer readCount = segment.getReadCountOpt().orElse(null);
Integer fragmentCount = segment.getFragmentCountOpt().orElse(null);
Integer kmerCount = segment.getKmerCountOpt().orElse(null);
String sequenceChecksum = segment.containsSequenceChecksum() ? String.valueOf(segment.getSequenceChecksum()) : null;
String sequenceUri = segment.getSequenceUriOpt().orElse(null);
setValue(nodeTable, nodeRow, "name", String.class, name);
setValue(nodeTable, nodeRow, "length", Integer.class, length);
setValue(nodeTable, nodeRow, "readCount", Integer.class, readCount);
setValue(nodeTable, nodeRow, "fragmentCount", Integer.class, fragmentCount);
setValue(nodeTable, nodeRow, "kmerCount", Integer.class, kmerCount);
setValue(nodeTable, nodeRow, "sequenceChecksum", String.class, sequenceChecksum);
setValue(nodeTable, nodeRow, "sequenceUri", String.class, sequenceUri);
// default display length to length
Integer displayLength = length;
String sequence = orientation.isForward() ? segment.getSequence() : reverseComplement(segment.getSequence());
if (sequence != null) {
Integer sequenceLength = sequence.length();
String displaySequence = trimFromMiddle(sequence, displaySequenceLimit);
Integer displaySequenceLength = displaySequence.length();
if (loadSequences) {
setValue(nodeTable, nodeRow, "sequence", String.class, sequence);
}
setValue(nodeTable, nodeRow, "sequenceLength", Integer.class, sequenceLength);
setValue(nodeTable, nodeRow, "displaySequence", String.class, displaySequence);
setValue(nodeTable, nodeRow, "displaySequenceLength", Integer.class, displaySequenceLength);
// override display length with sequence length if necessary
if (length == null || length != sequenceLength) {
displayLength = sequenceLength;
}
}
StringBuilder sb = new StringBuilder();
sb.append(name);
if (displayLength != null) {
sb.append(" ");
sb.append(displayLength);
sb.append(" bp");
}
String displayName = sb.toString();
if (readCount != null) {
sb.append(" ");
sb.append(readCount);
sb.append(" reads");
}
if (fragmentCount != null) {
sb.append(" ");
sb.append(fragmentCount);
sb.append(" fragments");
}
if (kmerCount != null) {
sb.append(" ");
sb.append(kmerCount);
sb.append(" kmers");
}
String displayLabel = sb.toString();
setValue(nodeTable, nodeRow, "displayName", String.class, displayName);
setValue(nodeTable, nodeRow, "displayLength", Integer.class, displayLength);
setValue(nodeTable, nodeRow, "displayLabel", String.class, displayLabel);
nodes.put(name, node);
}
}
logger.info("converted segments and orientation to " + nodes.size() + " nodes");
segmentsByOrientation.clear();
taskMonitor.setStatusMessage("Building Cytoscape edges from edges and gaps ...");
for (Edge edge : edges) {
String sourceId = edge.getSource().getId();
String sourceOrientation = edge.getSource().isForwardOrientation() ? "+" : "-";
String targetId = edge.getTarget().getId();
String targetOrientation = edge.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge cyEdge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(cyEdge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, edge.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "edge");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "sourceStart", String.class, edge.getSourceStart().toString());
setValue(edgeTable, edgeRow, "sourceEnd", String.class, edge.getSourceEnd().toString());
setValue(edgeTable, edgeRow, "targetStart", String.class, edge.getTargetStart().toString());
setValue(edgeTable, edgeRow, "targetEnd", String.class, edge.getTargetEnd().toString());
setValue(edgeTable, edgeRow, "alignment", String.class, edge.hasAlignment() ? edge.getAlignment().toString() : null);
setValue(edgeTable, edgeRow, "readCount", Integer.class, edge.getReadCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "fragmentCount", Integer.class, edge.getFragmentCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "kmerCount", Integer.class, edge.getKmerCountOpt().orElse(null));
setValue(edgeTable, edgeRow, "mappingQuality", Integer.class, edge.getMappingQualityOpt().orElse(null));
setValue(edgeTable, edgeRow, "mismatchCount", Integer.class, edge.getMismatchCountOpt().orElse(null));
}
logger.info("converted edges to " + edges.size() + " edges");
for (Gap gap : gaps) {
String sourceId = gap.getSource().getId();
String sourceOrientation = gap.getSource().isForwardOrientation() ? "+" : "-";
String targetId = gap.getTarget().getId();
String targetOrientation = gap.getTarget().isForwardOrientation() ? "+" : "-";
CyNode sourceNode = nodes.get(sourceId + sourceOrientation);
CyNode targetNode = nodes.get(targetId + targetOrientation);
CyEdge edge = network.addEdge(sourceNode, targetNode, true);
CyTable edgeTable = network.getDefaultEdgeTable();
CyRow edgeRow = edgeTable.getRow(edge.getSUID());
setValue(edgeTable, edgeRow, "id", String.class, gap.getIdOpt().orElse(null));
setValue(edgeTable, edgeRow, "type", String.class, "gap");
setValue(edgeTable, edgeRow, "sourceId", String.class, sourceId);
setValue(edgeTable, edgeRow, "sourceOrientation", String.class, sourceOrientation);
setValue(edgeTable, edgeRow, "targetId", String.class, targetId);
setValue(edgeTable, edgeRow, "targetOrientation", String.class, targetOrientation);
setValue(edgeTable, edgeRow, "distance", Integer.class, gap.getDistance());
setValue(edgeTable, edgeRow, "variance", Integer.class, gap.getVarianceOpt().orElse(null));
}
logger.info("converted gaps to " + gaps.size() + " edges");
nodes.clear();
edges.clear();
gaps.clear();
// pass paths to AssemblyApp if requested
if (loadPaths && !paths.isEmpty()) {
taskMonitor.setStatusMessage("Loading paths in path view ...");
assemblyModel.setInputFileName(inputFile.toString());
// todo: convert to gfa1 paths?
// note paths in gfa2 can have references to segments, edges, or other groups
// assemblyModel.setPaths(paths, traversalsByPathName);
}
}
use of org.dishevelled.bio.assembly.gfa2.Reference in project molgenis-emx2 by molgenis.
the class SqlColumnRefArrayExecutor method createReferenceExistsCheck.
/**
* trigger on this column to check if foreign key exists. Might be composite key, i.e., list of
* columns
*/
private static void createReferenceExistsCheck(DSLContext jooq, Column column) {
String schemaName = column.getSchema().getName();
Name thisTable = name(schemaName, column.getTable().getTableName());
Name toTable = name(column.getRefSchema(), column.getRefTableName());
String functionName = getReferenceExistsCheckName(column);
List<Reference> references = column.getReferences();
String fromColumns = references.stream().map(r -> name(r.getName()).toString()).collect(Collectors.joining(","));
String toColumns = references.stream().map(r -> name(r.getRefTo()).toString()).collect(Collectors.joining(","));
String errorColumns = references.stream().map(r -> "COALESCE(error_row." + name(r.getRefTo()).toString() + ",'NULL')").collect(Collectors.joining("||','||"));
String exceptFilter = references.stream().map(r -> {
if (r.isOverlappingRef()) {
return name(r.getRefTo()) + " = NEW." + name(r.getName());
} else {
return name(r.getRefTo()) + " = ANY (NEW." + name(r.getName()) + ")";
}
}).collect(Collectors.joining(" AND "));
String unnestRefs = references.stream().map(r -> {
// can be overlapping with non_array reference
if (r.isOverlappingRef()) {
return "NEW." + name(r.getName()) + " AS " + name(r.getRefTo());
} else {
return "UNNEST(NEW." + name(r.getName()) + ") AS " + name(r.getRefTo());
}
}).collect(Collectors.joining(","));
String nonRefLinkFieldsAreNotNull = references.stream().filter(r -> !r.isOverlapping()).map(r2 -> "error_row." + name(r2.getRefTo()) + " IS NOT NULL ").collect(Collectors.joining(" OR "));
jooq.execute("CREATE OR REPLACE FUNCTION {0}() RETURNS trigger AS $BODY$ " + "\nDECLARE error_row RECORD;" + "\nBEGIN" + "\n\tFOR error_row IN SELECT {1} EXCEPT SELECT {2} FROM {3} WHERE {10} LOOP" + // exclude if only refLink fields are set
"\n\t\tIF {11} THEN" + "\n\t\t\tRAISE EXCEPTION USING ERRCODE='23503', MESSAGE = 'insert or update on table \"'||{9}||'\" violates foreign key (ref_array) constraint'" + " , DETAIL = 'Key ('||{6}||')=('|| {5} ||') is not present in table \"'||{7}||'\", column(s)('||{8}||')';" + "\n\t\tEND IF;" + "\n\tEND LOOP;" + "\n\tRETURN NEW;" + "\nEND; $BODY$ LANGUAGE plpgsql;", // 0
name(schemaName, functionName), // 1
keyword(unnestRefs), // 2
keyword(toColumns), // 3
toTable, // 4
thisTable, // 5
keyword(errorColumns), // 6
inline(fromColumns), // 7
inline(column.getRefTableName()), // 8
inline(toColumns), // 9
inline(column.getTableName()), // 10
keyword(exceptFilter), // 11
keyword(nonRefLinkFieldsAreNotNull));
// add the trigger
jooq.execute("CREATE CONSTRAINT TRIGGER {0} " + "\n\tAFTER INSERT OR UPDATE OF {1} ON {2} FROM {3}" + "\n\tDEFERRABLE INITIALLY IMMEDIATE " + "\n\tFOR EACH ROW EXECUTE PROCEDURE {4}()", name(functionName), keyword(fromColumns), thisTable, toTable, name(column.getTable().getSchema().getName(), functionName));
}
use of org.dishevelled.bio.assembly.gfa2.Reference in project dishevelled-bio by heuermh.
the class ReassemblePaths method call.
@Override
public Integer call() throws Exception {
BufferedReader reader = null;
PrintWriter writer = null;
try {
reader = reader(inputGfa1File);
writer = writer(outputGfa1File);
final PrintWriter w = writer;
final List<Path> paths = new ArrayList<Path>();
final ListMultimap<String, Traversal> traversalsByPathName = ArrayListMultimap.create();
Gfa1Reader.stream(reader, new Gfa1Listener() {
@Override
public boolean record(final Gfa1Record gfa1Record) {
if (gfa1Record instanceof Path) {
Path path = (Path) gfa1Record;
paths.add(path);
} else if (gfa1Record instanceof Traversal) {
Traversal traversal = (Traversal) gfa1Record;
traversalsByPathName.put(traversal.getPathName(), traversal);
} else {
Gfa1Writer.write(gfa1Record, w);
}
return true;
}
});
for (Path path : paths) {
List<Traversal> traversals = traversalsByPathName.get(path.getName());
Collections.sort(traversals, new Comparator<Traversal>() {
@Override
public int compare(final Traversal t0, final Traversal t1) {
return t0.getOrdinal() - t1.getOrdinal();
}
});
List<Reference> segments = new ArrayList<Reference>();
List<String> overlaps = new ArrayList<String>();
for (Traversal traversal : traversals) {
if (segments.isEmpty()) {
segments.add(traversal.getSource());
}
segments.add(traversal.getTarget());
if (traversal.hasOverlap()) {
overlaps.add(traversal.getOverlap());
}
}
Gfa1Writer.write(new Path(path.getName(), segments, overlaps.isEmpty() ? null : overlaps, path.getAnnotations()), w);
}
return 0;
} finally {
try {
reader.close();
} catch (Exception e) {
// ignore
}
try {
writer.close();
} catch (Exception e) {
// ignore
}
}
}
Aggregations