use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.
the class BedpeRecord method parseBreakend.
private static BreakendSummary parseBreakend(SAMSequenceDictionary dict, String chr, String strStart, String strEnd, String strDirection, BreakendDirection defaultDirection) {
// convert back to 1-based
int start = Integer.parseInt(strStart) + 1;
int end = Integer.parseInt(strEnd);
BreakendDirection dir = defaultDirection;
if (strDirection.equals("+")) {
dir = BreakendDirection.Forward;
} else if (strDirection.equals("-")) {
dir = BreakendDirection.Backward;
}
SAMSequenceRecord seq = dict.getSequence(chr);
if (seq == null) {
throw new IllegalArgumentException(String.format("Contig %s missing from reference genome", chr));
}
return new BreakendSummary(seq.getSequenceIndex(), dir, (start + end) / 2, start, end);
}
use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.
the class NonReferenceContigAssembler method callContig.
/**
* Calls the contig.
* Note: to call the contig, additional graph nodes may be required to be loaded. No loading checks
* (such as flushing excessively dense intervals) are performed on these additional loaded nodes.
* @param contig to call
* @return SAMRecord containing breakend assembly, null if a valid break-end assembly contig
* could not be created from this contig
*/
private SAMRecord callContig(ArrayDeque<KmerPathSubnode> rawcontig) {
if (rawcontig == null)
return null;
ArrayDeque<KmerPathSubnode> contig = rawcontig;
if (containsKmerRepeat(contig)) {
// recalculate the called contig, this may break the contig at the repeated kmer
MisassemblyFixer fixed = new MisassemblyFixer(contig);
contig = new ArrayDeque<KmerPathSubnode>(fixed.correctMisassignedEvidence(evidenceTracker.support(contig)));
}
if (contig.isEmpty())
return null;
int contigLength = contig.stream().mapToInt(sn -> sn.length()).sum();
int targetAnchorLength = Math.max(Math.min(contigLength, maxExpectedBreakendLength()), maxAnchorLength);
KmerPathNodePath startAnchorPath = new KmerPathNodePath(contig.getFirst(), false, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getFirst().length());
startAnchorPath.greedyTraverse(true, false);
ArrayDeque<KmerPathSubnode> startingAnchor = startAnchorPath.headNode().asSubnodes();
startingAnchor.removeLast();
// make sure we have enough of the graph loaded so that when
// we traverse forward, our anchor sequence will be fully defined
advanceUnderlying(contig.getLast().lastEnd() + targetAnchorLength + minDistanceFromNextPositionForEvidenceToBeFullyLoaded());
KmerPathNodePath endAnchorPath = new KmerPathNodePath(contig.getLast(), true, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getLast().length());
endAnchorPath.greedyTraverse(true, false);
ArrayDeque<KmerPathSubnode> endingAnchor = endAnchorPath.headNode().asSubnodes();
endingAnchor.removeFirst();
List<KmerPathSubnode> fullContig = new ArrayList<KmerPathSubnode>(contig.size() + startingAnchor.size() + endingAnchor.size());
fullContig.addAll(startingAnchor);
fullContig.addAll(contig);
fullContig.addAll(endingAnchor);
byte[] bases = KmerEncodingHelper.baseCalls(fullContig.stream().flatMap(sn -> sn.node().pathKmers().stream()).collect(Collectors.toList()), k);
byte[] quals = DeBruijnGraphBase.kmerWeightsToBaseQuals(k, fullContig.stream().flatMapToInt(sn -> sn.node().pathWeights().stream().mapToInt(Integer::intValue)).toArray());
assert (quals.length == bases.length);
// left aligned anchor position although it shouldn't matter since anchoring should be a single base wide
int startAnchorPosition = startingAnchor.size() == 0 ? 0 : startingAnchor.getLast().lastStart() + k - 1;
int endAnchorPosition = endingAnchor.size() == 0 ? 0 : endingAnchor.getFirst().firstStart();
int startAnchorBaseCount = startingAnchor.size() == 0 ? 0 : startingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
int endAnchorBaseCount = endingAnchor.size() == 0 ? 0 : endingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
int startBasesToTrim = Math.max(0, startAnchorBaseCount - targetAnchorLength);
int endingBasesToTrim = Math.max(0, endAnchorBaseCount - targetAnchorLength);
bases = Arrays.copyOfRange(bases, startBasesToTrim, bases.length - endingBasesToTrim);
quals = Arrays.copyOfRange(quals, startBasesToTrim, quals.length - endingBasesToTrim);
Set<KmerEvidence> evidence = evidenceTracker.untrack(contig);
List<DirectedEvidence> evidenceIds = evidence.stream().map(e -> e.evidence()).collect(Collectors.toList());
SAMRecord assembledContig;
if (startingAnchor.size() == 0 && endingAnchor.size() == 0) {
assert (startBasesToTrim == 0);
assert (endingBasesToTrim == 0);
// unanchored
if (evidence.size() > 0) {
BreakendSummary be = Models.calculateBreakend(aes.getContext().getLinear(), evidence.stream().map(e -> e.evidence().getBreakendSummary()).collect(Collectors.toList()), evidence.stream().map(e -> ScalingHelper.toScaledWeight(e.evidenceQuality())).collect(Collectors.toList()));
assembledContig = AssemblyFactory.createUnanchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, be, evidenceIds, bases, quals);
if (evidence.stream().anyMatch(e -> e.isAnchored())) {
log.debug(String.format("Unanchored assembly %s at %s:%d contains anchored evidence", assembledContig.getReadName(), contigName, contig.getFirst().firstStart()));
}
} else {
// Can't call contig because we don't known the supporting breakend positions
assembledContig = null;
}
} else if (startingAnchor.size() == 0) {
// end anchored
assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Backward, evidenceIds, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
} else if (endingAnchor.size() == 0) {
// start anchored
assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Forward, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, bases, quals);
} else {
if (startAnchorBaseCount + endAnchorBaseCount >= quals.length) {
// no unanchored bases - not an SV assembly
assembledContig = null;
} else {
assembledContig = AssemblyFactory.createAnchoredBreakpoint(aes.getContext(), aes, assemblyNameGenerator, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
}
}
if (contigLength > maxExpectedBreakendLength()) {
log.debug(String.format("Called breakend contig %s at %s:%d-%d of length %d when maximum expected breakend contig length is %d", assembledContig == null ? "" : assembledContig.getReadName(), contigName, rawcontig.getFirst().firstStart(), rawcontig.getLast().lastEnd(), contigLength, maxExpectedBreakendLength()));
}
if (assembledContig != null) {
if (aes.getContext().getConfig().getVisualisation().assemblyGraph) {
try {
PositionalExporter.exportDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
} catch (Exception ex) {
log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
}
}
if (aes.getContext().getConfig().getVisualisation().assemblyGraphFullSize) {
try {
PositionalExporter.exportNodeDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.fullsize." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
} catch (Exception ex) {
log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
}
}
if (aes.getContext().getConfig().getVisualisation().assemblyContigMemoization) {
File file = new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.path.memoization." + contigName + "." + Integer.toString(pathExportCount.incrementAndGet()) + ".csv");
try {
bestContigCaller.exportState(file);
} catch (IOException e) {
log.debug(e, " Unable to export assembly path memoization to ", file, " ", contigName);
}
}
}
stats.contigNodes = contig.size();
stats.truncatedNodes = rawcontig.size() - contig.size();
stats.contigStartPosition = contig.getFirst().firstStart();
stats.startAnchorNodes = startingAnchor.size();
stats.endAnchorNodes = endingAnchor.size();
if (exportTracker != null) {
exportTracker.trackAssembly(bestContigCaller);
}
// remove all evidence contributing to this assembly from the graph
if (evidence.size() > 0) {
removeFromGraph(evidence);
if (Defaults.SANITY_CHECK_MEMOIZATION) {
bestContigCaller.sanityCheck(graphByPosition);
}
} else {
if (!MessageThrottler.Current.shouldSupress(log, "unsupported paths")) {
log.error("Sanity check failure: found path with no support. Attempting to recover by direct node removal ", contigName);
}
for (KmerPathSubnode n : contig) {
removeFromGraph(n.node(), true);
}
}
contigsCalled++;
if (assembledContig != null) {
called.add(assembledContig);
}
return assembledContig;
}
use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.
the class KmerEvidence method create.
public static KmerEvidence create(int k, SingleReadEvidence sre) {
if (!sre.isBreakendExact()) {
throw new NotImplementedException("reassembly of XNX placeholder contigs");
}
byte[] aseq = sre.getAnchorSequence();
byte[] beseq = sre.getBreakendSequence();
byte[] aqual = sre.getAnchorQuality();
byte[] bequal = sre.getBreakendQuality();
byte[] seq;
byte[] qual;
BreakendSummary bs = sre.getBreakendSummary();
int positionOffset;
int firstAnchoredBase;
int anchoredBases = aseq.length;
if (bs.direction == BreakendDirection.Forward) {
seq = ArrayUtils.addAll(aseq, beseq);
qual = ArrayUtils.addAll(aqual, bequal);
positionOffset = -(aseq.length - 1);
firstAnchoredBase = 0;
} else {
seq = ArrayUtils.addAll(beseq, aseq);
qual = ArrayUtils.addAll(bequal, aqual);
firstAnchoredBase = beseq.length;
positionOffset = -beseq.length;
}
if (k > seq.length) {
return null;
}
return new KmerEvidence(sre, bs.start + positionOffset, bs.end + positionOffset, k, firstAnchoredBase, firstAnchoredBase + anchoredBases - (k - 1), seq, qual, false, false, sre.getBreakendQual());
}
use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.
the class Models method calculateBreakend.
/**
* Calculates the most likely breakend interval for the given evidence
* @param evidence
* @return breakend interval with highest total evidence quality
*/
public static BreakendSummary calculateBreakend(LinearGenomicCoordinate lgc, List<BreakendSummary> bs, List<Long> weights) {
if (bs == null || bs.size() == 0)
throw new IllegalArgumentException("No evidence supplied");
if (weights.size() != bs.size())
throw new IllegalArgumentException("Array lenght mismatch");
Node fwd = maximalInterval(lgc, BreakendDirection.Forward, bs, weights);
Node bwd = maximalInterval(lgc, BreakendDirection.Backward, bs, weights);
if (fwd == null && bwd == null) {
// all evidence is insignificant, just return something as we're going to get filtered anyway
BreakendSummary fallback = bs.get(0);
if (fallback instanceof BreakpointSummary) {
BreakpointSummary bp = (BreakpointSummary) fallback;
fallback = bp.localBreakend();
}
return fallback;
}
Node node = fwd;
BreakendDirection dir = BreakendDirection.Forward;
if (fwd == null || (bwd != null && fwd.weight < bwd.weight)) {
node = bwd;
dir = BreakendDirection.Backward;
}
assert (lgc.getReferenceIndex(node.start) == lgc.getReferenceIndex(node.stop));
int start = lgc.getReferencePosition(node.start);
int end = lgc.getReferencePosition(node.stop);
return new BreakendSummary(lgc.getReferenceIndex(node.start), dir, MathUtil.average(start, end), start, end);
}
use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.
the class Models method maximalInterval.
private static Node maximalInterval(LinearGenomicCoordinate lgc, BreakendDirection dir, List<BreakendSummary> breaks, List<Long> weights) {
MaximumCliqueIntervalGraph calc = new MaximumCliqueIntervalGraph();
List<Node> nodes = new ArrayList<Node>(breaks.size());
for (int i = 0; i < breaks.size(); i++) {
BreakendSummary bs = breaks.get(i);
long weight = weights.get(i);
if (bs == null)
continue;
if (bs.direction != dir)
continue;
if (weight > 0) {
nodes.add(new Node(lgc.getLinearCoordinate(bs.referenceIndex, bs.start), lgc.getLinearCoordinate(bs.referenceIndex, bs.end), weight));
}
}
if (nodes.size() == 0)
return null;
return calc.calculateMaximumClique(nodes);
}
Aggregations