Search in sources :

Example 11 with BreakendSummary

use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.

the class BedpeRecord method parseBreakend.

private static BreakendSummary parseBreakend(SAMSequenceDictionary dict, String chr, String strStart, String strEnd, String strDirection, BreakendDirection defaultDirection) {
    // convert back to 1-based
    int start = Integer.parseInt(strStart) + 1;
    int end = Integer.parseInt(strEnd);
    BreakendDirection dir = defaultDirection;
    if (strDirection.equals("+")) {
        dir = BreakendDirection.Forward;
    } else if (strDirection.equals("-")) {
        dir = BreakendDirection.Backward;
    }
    SAMSequenceRecord seq = dict.getSequence(chr);
    if (seq == null) {
        throw new IllegalArgumentException(String.format("Contig %s missing from reference genome", chr));
    }
    return new BreakendSummary(seq.getSequenceIndex(), dir, (start + end) / 2, start, end);
}
Also used : BreakendDirection(au.edu.wehi.idsv.BreakendDirection) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 12 with BreakendSummary

use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.

the class NonReferenceContigAssembler method callContig.

/**
 * Calls the contig.
 * Note: to call the contig, additional graph nodes may be required to be loaded. No loading checks
 * (such as flushing excessively dense intervals) are performed on these additional loaded nodes.
 * @param contig to call
 * @return SAMRecord containing breakend assembly, null if a valid break-end assembly contig
 * could not be created from this contig
 */
private SAMRecord callContig(ArrayDeque<KmerPathSubnode> rawcontig) {
    if (rawcontig == null)
        return null;
    ArrayDeque<KmerPathSubnode> contig = rawcontig;
    if (containsKmerRepeat(contig)) {
        // recalculate the called contig, this may break the contig at the repeated kmer
        MisassemblyFixer fixed = new MisassemblyFixer(contig);
        contig = new ArrayDeque<KmerPathSubnode>(fixed.correctMisassignedEvidence(evidenceTracker.support(contig)));
    }
    if (contig.isEmpty())
        return null;
    int contigLength = contig.stream().mapToInt(sn -> sn.length()).sum();
    int targetAnchorLength = Math.max(Math.min(contigLength, maxExpectedBreakendLength()), maxAnchorLength);
    KmerPathNodePath startAnchorPath = new KmerPathNodePath(contig.getFirst(), false, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getFirst().length());
    startAnchorPath.greedyTraverse(true, false);
    ArrayDeque<KmerPathSubnode> startingAnchor = startAnchorPath.headNode().asSubnodes();
    startingAnchor.removeLast();
    // make sure we have enough of the graph loaded so that when
    // we traverse forward, our anchor sequence will be fully defined
    advanceUnderlying(contig.getLast().lastEnd() + targetAnchorLength + minDistanceFromNextPositionForEvidenceToBeFullyLoaded());
    KmerPathNodePath endAnchorPath = new KmerPathNodePath(contig.getLast(), true, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getLast().length());
    endAnchorPath.greedyTraverse(true, false);
    ArrayDeque<KmerPathSubnode> endingAnchor = endAnchorPath.headNode().asSubnodes();
    endingAnchor.removeFirst();
    List<KmerPathSubnode> fullContig = new ArrayList<KmerPathSubnode>(contig.size() + startingAnchor.size() + endingAnchor.size());
    fullContig.addAll(startingAnchor);
    fullContig.addAll(contig);
    fullContig.addAll(endingAnchor);
    byte[] bases = KmerEncodingHelper.baseCalls(fullContig.stream().flatMap(sn -> sn.node().pathKmers().stream()).collect(Collectors.toList()), k);
    byte[] quals = DeBruijnGraphBase.kmerWeightsToBaseQuals(k, fullContig.stream().flatMapToInt(sn -> sn.node().pathWeights().stream().mapToInt(Integer::intValue)).toArray());
    assert (quals.length == bases.length);
    // left aligned anchor position although it shouldn't matter since anchoring should be a single base wide
    int startAnchorPosition = startingAnchor.size() == 0 ? 0 : startingAnchor.getLast().lastStart() + k - 1;
    int endAnchorPosition = endingAnchor.size() == 0 ? 0 : endingAnchor.getFirst().firstStart();
    int startAnchorBaseCount = startingAnchor.size() == 0 ? 0 : startingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
    int endAnchorBaseCount = endingAnchor.size() == 0 ? 0 : endingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
    int startBasesToTrim = Math.max(0, startAnchorBaseCount - targetAnchorLength);
    int endingBasesToTrim = Math.max(0, endAnchorBaseCount - targetAnchorLength);
    bases = Arrays.copyOfRange(bases, startBasesToTrim, bases.length - endingBasesToTrim);
    quals = Arrays.copyOfRange(quals, startBasesToTrim, quals.length - endingBasesToTrim);
    Set<KmerEvidence> evidence = evidenceTracker.untrack(contig);
    List<DirectedEvidence> evidenceIds = evidence.stream().map(e -> e.evidence()).collect(Collectors.toList());
    SAMRecord assembledContig;
    if (startingAnchor.size() == 0 && endingAnchor.size() == 0) {
        assert (startBasesToTrim == 0);
        assert (endingBasesToTrim == 0);
        // unanchored
        if (evidence.size() > 0) {
            BreakendSummary be = Models.calculateBreakend(aes.getContext().getLinear(), evidence.stream().map(e -> e.evidence().getBreakendSummary()).collect(Collectors.toList()), evidence.stream().map(e -> ScalingHelper.toScaledWeight(e.evidenceQuality())).collect(Collectors.toList()));
            assembledContig = AssemblyFactory.createUnanchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, be, evidenceIds, bases, quals);
            if (evidence.stream().anyMatch(e -> e.isAnchored())) {
                log.debug(String.format("Unanchored assembly %s at %s:%d contains anchored evidence", assembledContig.getReadName(), contigName, contig.getFirst().firstStart()));
            }
        } else {
            // Can't call contig because we don't known the supporting breakend positions
            assembledContig = null;
        }
    } else if (startingAnchor.size() == 0) {
        // end anchored
        assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Backward, evidenceIds, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
    } else if (endingAnchor.size() == 0) {
        // start anchored
        assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Forward, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, bases, quals);
    } else {
        if (startAnchorBaseCount + endAnchorBaseCount >= quals.length) {
            // no unanchored bases - not an SV assembly
            assembledContig = null;
        } else {
            assembledContig = AssemblyFactory.createAnchoredBreakpoint(aes.getContext(), aes, assemblyNameGenerator, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
        }
    }
    if (contigLength > maxExpectedBreakendLength()) {
        log.debug(String.format("Called breakend contig %s at %s:%d-%d of length %d when maximum expected breakend contig length is %d", assembledContig == null ? "" : assembledContig.getReadName(), contigName, rawcontig.getFirst().firstStart(), rawcontig.getLast().lastEnd(), contigLength, maxExpectedBreakendLength()));
    }
    if (assembledContig != null) {
        if (aes.getContext().getConfig().getVisualisation().assemblyGraph) {
            try {
                PositionalExporter.exportDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
            } catch (Exception ex) {
                log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
            }
        }
        if (aes.getContext().getConfig().getVisualisation().assemblyGraphFullSize) {
            try {
                PositionalExporter.exportNodeDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.fullsize." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
            } catch (Exception ex) {
                log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
            }
        }
        if (aes.getContext().getConfig().getVisualisation().assemblyContigMemoization) {
            File file = new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.path.memoization." + contigName + "." + Integer.toString(pathExportCount.incrementAndGet()) + ".csv");
            try {
                bestContigCaller.exportState(file);
            } catch (IOException e) {
                log.debug(e, " Unable to export assembly path memoization to ", file, " ", contigName);
            }
        }
    }
    stats.contigNodes = contig.size();
    stats.truncatedNodes = rawcontig.size() - contig.size();
    stats.contigStartPosition = contig.getFirst().firstStart();
    stats.startAnchorNodes = startingAnchor.size();
    stats.endAnchorNodes = endingAnchor.size();
    if (exportTracker != null) {
        exportTracker.trackAssembly(bestContigCaller);
    }
    // remove all evidence contributing to this assembly from the graph
    if (evidence.size() > 0) {
        removeFromGraph(evidence);
        if (Defaults.SANITY_CHECK_MEMOIZATION) {
            bestContigCaller.sanityCheck(graphByPosition);
        }
    } else {
        if (!MessageThrottler.Current.shouldSupress(log, "unsupported paths")) {
            log.error("Sanity check failure: found path with no support. Attempting to recover by direct node removal ", contigName);
        }
        for (KmerPathSubnode n : contig) {
            removeFromGraph(n.node(), true);
        }
    }
    contigsCalled++;
    if (assembledContig != null) {
        called.add(assembledContig);
    }
    return assembledContig;
}
Also used : AssemblyEvidenceSource(au.edu.wehi.idsv.AssemblyEvidenceSource) Arrays(java.util.Arrays) RangeSet(com.google.common.collect.RangeSet) KmerEncodingHelper(au.edu.wehi.idsv.debruijn.KmerEncodingHelper) SortedSet(java.util.SortedSet) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) ScalingHelper(au.edu.wehi.idsv.graph.ScalingHelper) ContigStats(au.edu.wehi.idsv.visualisation.PositionalDeBruijnGraphTracker.ContigStats) MessageThrottler(au.edu.wehi.idsv.util.MessageThrottler) Defaults(au.edu.wehi.idsv.Defaults) PeekingIterator(com.google.common.collect.PeekingIterator) TreeRangeSet(com.google.common.collect.TreeRangeSet) TreeSet(java.util.TreeSet) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) Models(au.edu.wehi.idsv.model.Models) IntervalUtil(au.edu.wehi.idsv.util.IntervalUtil) Lists(com.google.common.collect.Lists) DeBruijnGraphBase(au.edu.wehi.idsv.debruijn.DeBruijnGraphBase) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) PositionalExporter(au.edu.wehi.idsv.visualisation.PositionalExporter) LongSet(it.unimi.dsi.fastutil.longs.LongSet) AssemblyChunkTelemetry(au.edu.wehi.idsv.visualisation.AssemblyTelemetry.AssemblyChunkTelemetry) IdentityHashMap(java.util.IdentityHashMap) AssemblyIdGenerator(au.edu.wehi.idsv.AssemblyIdGenerator) Iterator(java.util.Iterator) Collection(java.util.Collection) Range(com.google.common.collect.Range) Set(java.util.Set) IOException(java.io.IOException) BreakendDirection(au.edu.wehi.idsv.BreakendDirection) SanityCheckFailureException(au.edu.wehi.idsv.SanityCheckFailureException) Collectors(java.util.stream.Collectors) DirectedEvidence(au.edu.wehi.idsv.DirectedEvidence) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) PositionalDeBruijnGraphTracker(au.edu.wehi.idsv.visualisation.PositionalDeBruijnGraphTracker) List(java.util.List) Log(htsjdk.samtools.util.Log) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) ObjectOpenCustomHashSet(it.unimi.dsi.fastutil.objects.ObjectOpenCustomHashSet) AssemblyFactory(au.edu.wehi.idsv.AssemblyFactory) Entry(java.util.Map.Entry) Queue(java.util.Queue) Long2ObjectMap(it.unimi.dsi.fastutil.longs.Long2ObjectMap) ArrayDeque(java.util.ArrayDeque) ArrayList(java.util.ArrayList) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) IOException(java.io.IOException) IOException(java.io.IOException) SanityCheckFailureException(au.edu.wehi.idsv.SanityCheckFailureException) DirectedEvidence(au.edu.wehi.idsv.DirectedEvidence) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 13 with BreakendSummary

use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.

the class KmerEvidence method create.

public static KmerEvidence create(int k, SingleReadEvidence sre) {
    if (!sre.isBreakendExact()) {
        throw new NotImplementedException("reassembly of XNX placeholder contigs");
    }
    byte[] aseq = sre.getAnchorSequence();
    byte[] beseq = sre.getBreakendSequence();
    byte[] aqual = sre.getAnchorQuality();
    byte[] bequal = sre.getBreakendQuality();
    byte[] seq;
    byte[] qual;
    BreakendSummary bs = sre.getBreakendSummary();
    int positionOffset;
    int firstAnchoredBase;
    int anchoredBases = aseq.length;
    if (bs.direction == BreakendDirection.Forward) {
        seq = ArrayUtils.addAll(aseq, beseq);
        qual = ArrayUtils.addAll(aqual, bequal);
        positionOffset = -(aseq.length - 1);
        firstAnchoredBase = 0;
    } else {
        seq = ArrayUtils.addAll(beseq, aseq);
        qual = ArrayUtils.addAll(bequal, aqual);
        firstAnchoredBase = beseq.length;
        positionOffset = -beseq.length;
    }
    if (k > seq.length) {
        return null;
    }
    return new KmerEvidence(sre, bs.start + positionOffset, bs.end + positionOffset, k, firstAnchoredBase, firstAnchoredBase + anchoredBases - (k - 1), seq, qual, false, false, sre.getBreakendQual());
}
Also used : NotImplementedException(org.apache.commons.lang3.NotImplementedException) BreakendSummary(au.edu.wehi.idsv.BreakendSummary)

Example 14 with BreakendSummary

use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.

the class Models method calculateBreakend.

/**
 * Calculates the most likely breakend interval for the given evidence
 * @param evidence
 * @return breakend interval with highest total evidence quality
 */
public static BreakendSummary calculateBreakend(LinearGenomicCoordinate lgc, List<BreakendSummary> bs, List<Long> weights) {
    if (bs == null || bs.size() == 0)
        throw new IllegalArgumentException("No evidence supplied");
    if (weights.size() != bs.size())
        throw new IllegalArgumentException("Array lenght mismatch");
    Node fwd = maximalInterval(lgc, BreakendDirection.Forward, bs, weights);
    Node bwd = maximalInterval(lgc, BreakendDirection.Backward, bs, weights);
    if (fwd == null && bwd == null) {
        // all evidence is insignificant, just return something as we're going to get filtered anyway
        BreakendSummary fallback = bs.get(0);
        if (fallback instanceof BreakpointSummary) {
            BreakpointSummary bp = (BreakpointSummary) fallback;
            fallback = bp.localBreakend();
        }
        return fallback;
    }
    Node node = fwd;
    BreakendDirection dir = BreakendDirection.Forward;
    if (fwd == null || (bwd != null && fwd.weight < bwd.weight)) {
        node = bwd;
        dir = BreakendDirection.Backward;
    }
    assert (lgc.getReferenceIndex(node.start) == lgc.getReferenceIndex(node.stop));
    int start = lgc.getReferencePosition(node.start);
    int end = lgc.getReferencePosition(node.stop);
    return new BreakendSummary(lgc.getReferenceIndex(node.start), dir, MathUtil.average(start, end), start, end);
}
Also used : Node(au.edu.wehi.idsv.graph.MaximumCliqueIntervalGraph.Node) BreakendDirection(au.edu.wehi.idsv.BreakendDirection) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) BreakpointSummary(au.edu.wehi.idsv.BreakpointSummary)

Example 15 with BreakendSummary

use of au.edu.wehi.idsv.BreakendSummary in project gridss by PapenfussLab.

the class Models method maximalInterval.

private static Node maximalInterval(LinearGenomicCoordinate lgc, BreakendDirection dir, List<BreakendSummary> breaks, List<Long> weights) {
    MaximumCliqueIntervalGraph calc = new MaximumCliqueIntervalGraph();
    List<Node> nodes = new ArrayList<Node>(breaks.size());
    for (int i = 0; i < breaks.size(); i++) {
        BreakendSummary bs = breaks.get(i);
        long weight = weights.get(i);
        if (bs == null)
            continue;
        if (bs.direction != dir)
            continue;
        if (weight > 0) {
            nodes.add(new Node(lgc.getLinearCoordinate(bs.referenceIndex, bs.start), lgc.getLinearCoordinate(bs.referenceIndex, bs.end), weight));
        }
    }
    if (nodes.size() == 0)
        return null;
    return calc.calculateMaximumClique(nodes);
}
Also used : MaximumCliqueIntervalGraph(au.edu.wehi.idsv.graph.MaximumCliqueIntervalGraph) Node(au.edu.wehi.idsv.graph.MaximumCliqueIntervalGraph.Node) ArrayList(java.util.ArrayList) BreakendSummary(au.edu.wehi.idsv.BreakendSummary)

Aggregations

BreakendSummary (au.edu.wehi.idsv.BreakendSummary)19 Test (org.junit.Test)13 BreakpointSummary (au.edu.wehi.idsv.BreakpointSummary)10 MockDirectedEvidence (au.edu.wehi.idsv.MockDirectedEvidence)5 MockDirectedBreakpoint (au.edu.wehi.idsv.MockDirectedBreakpoint)4 ArrayList (java.util.ArrayList)4 AssemblyEvidenceSource (au.edu.wehi.idsv.AssemblyEvidenceSource)3 BreakendDirection (au.edu.wehi.idsv.BreakendDirection)3 DirectedEvidence (au.edu.wehi.idsv.DirectedEvidence)3 ProcessingContext (au.edu.wehi.idsv.ProcessingContext)2 SequentialIdGenerator (au.edu.wehi.idsv.SequentialIdGenerator)2 SingleReadEvidence (au.edu.wehi.idsv.SingleReadEvidence)2 Node (au.edu.wehi.idsv.graph.MaximumCliqueIntervalGraph.Node)2 AssemblyFactory (au.edu.wehi.idsv.AssemblyFactory)1 AssemblyIdGenerator (au.edu.wehi.idsv.AssemblyIdGenerator)1 Defaults (au.edu.wehi.idsv.Defaults)1 IdsvVariantContext (au.edu.wehi.idsv.IdsvVariantContext)1 IdsvVariantContextBuilder (au.edu.wehi.idsv.IdsvVariantContextBuilder)1 SanityCheckFailureException (au.edu.wehi.idsv.SanityCheckFailureException)1 VariantContextDirectedEvidence (au.edu.wehi.idsv.VariantContextDirectedEvidence)1