Search in sources :

Example 1 with Log

use of htsjdk.samtools.util.Log in project gridss by PapenfussLab.

the class SAMRecordUtil method calculateSATags.

private static void calculateSATags(List<SAMRecord> list) {
    // TODO use CC, CP, HI, IH tags if they are present
    // TODO break ties in an other other than just overwriting the previous
    // alignment that
    // finishes at a given position
    SortedMap<Integer, SAMRecord> start = new TreeMap<Integer, SAMRecord>();
    SortedMap<Integer, SAMRecord> end = new TreeMap<Integer, SAMRecord>();
    for (SAMRecord r : list) {
        start.put(getFirstAlignedBaseReadOffset(r), r);
        end.put(getLastAlignedBaseReadOffset(r), r);
    }
    for (SAMRecord r : list) {
        if (r.getAttribute(SAMTag.SA.name()) != null)
            continue;
        ArrayDeque<SAMRecord> alignment = new ArrayDeque<SAMRecord>();
        SAMRecord prev = r;
        while (prev != null) {
            SortedMap<Integer, SAMRecord> before = end.headMap(getFirstAlignedBaseReadOffset(prev));
            if (before.isEmpty()) {
                prev = null;
            } else {
                prev = before.get(before.lastKey());
                alignment.addFirst(prev);
            }
        }
        SAMRecord next = r;
        while (next != null) {
            SortedMap<Integer, SAMRecord> after = start.tailMap(getLastAlignedBaseReadOffset(next) + 1);
            if (after.isEmpty()) {
                next = null;
            } else {
                next = after.get(after.firstKey());
                alignment.addLast(next);
            }
        }
        if (alignment.size() == 0) {
            r.setAttribute(SAMTag.SA.name(), null);
        } else {
            List<SAMRecord> aln = new ArrayList<>(alignment);
            Collections.sort(aln, new Ordering<SAMRecord>() {

                @Override
                public int compare(SAMRecord arg0, SAMRecord arg1) {
                    // element points to the primary line.
                    return Booleans.compare(arg0.getSupplementaryAlignmentFlag(), arg1.getSupplementaryAlignmentFlag());
                }
            });
            StringBuilder sb = new StringBuilder();
            for (SAMRecord chim : aln) {
                sb.append(new ChimericAlignment(chim));
                sb.append(";");
            }
            sb.deleteCharAt(sb.length() - 1);
            r.setAttribute(SAMTag.SA.name(), sb.toString());
        }
    }
    Set<ChimericAlignment> referencedReads = list.stream().flatMap(r -> ChimericAlignment.getChimericAlignments(r.getStringAttribute(SAMTag.SA.name())).stream()).collect(Collectors.toSet());
    // validate SA tags
    for (SAMRecord r : list) {
        referencedReads.remove(new ChimericAlignment(r));
    }
    if (!referencedReads.isEmpty()) {
        if (!MessageThrottler.Current.shouldSupress(log, "Missing records referred to by SA tags")) {
            log.warn(String.format("SA tag of read %s refers to missing alignments %s", list.get(0).getReadName(), referencedReads));
        }
    }
}
Also used : Booleans(com.google.common.primitives.Booleans) TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) Arrays(java.util.Arrays) Iterables(com.google.common.collect.Iterables) SequenceUtil(htsjdk.samtools.util.SequenceUtil) SAMUtils(htsjdk.samtools.SAMUtils) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) HashMap(java.util.HashMap) ArrayUtils(org.apache.commons.lang3.ArrayUtils) MessageThrottler(au.edu.wehi.idsv.util.MessageThrottler) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) IntervalUtil(au.edu.wehi.idsv.util.IntervalUtil) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMTag(htsjdk.samtools.SAMTag) Lists(com.google.common.collect.Lists) StringUtil(htsjdk.samtools.util.StringUtil) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) PairOrientation(htsjdk.samtools.SamPairUtil.PairOrientation) ImmutableSet(com.google.common.collect.ImmutableSet) SAMException(htsjdk.samtools.SAMException) Set(java.util.Set) SamPairUtil(htsjdk.samtools.SamPairUtil) BreakendDirection(au.edu.wehi.idsv.BreakendDirection) Alignment(au.edu.wehi.idsv.alignment.Alignment) Collectors(java.util.stream.Collectors) ComparisonChain(com.google.common.collect.ComparisonChain) Sets(com.google.common.collect.Sets) Bytes(com.google.common.primitives.Bytes) SAMRecord(htsjdk.samtools.SAMRecord) MathUtil(au.edu.wehi.idsv.util.MathUtil) List(java.util.List) AlignerFactory(au.edu.wehi.idsv.alignment.AlignerFactory) Log(htsjdk.samtools.util.Log) TreeMap(java.util.TreeMap) Ordering(com.google.common.collect.Ordering) SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) Optional(java.util.Optional) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ArrayDeque(java.util.ArrayDeque) Comparator(java.util.Comparator) Collections(java.util.Collections) SortedMap(java.util.SortedMap) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) ArrayDeque(java.util.ArrayDeque) SAMRecord(htsjdk.samtools.SAMRecord)

Example 2 with Log

use of htsjdk.samtools.util.Log in project gridss by PapenfussLab.

the class NonReferenceContigAssembler method callContig.

/**
 * Calls the contig.
 * Note: to call the contig, additional graph nodes may be required to be loaded. No loading checks
 * (such as flushing excessively dense intervals) are performed on these additional loaded nodes.
 * @param contig to call
 * @return SAMRecord containing breakend assembly, null if a valid break-end assembly contig
 * could not be created from this contig
 */
private SAMRecord callContig(ArrayDeque<KmerPathSubnode> rawcontig) {
    if (rawcontig == null)
        return null;
    ArrayDeque<KmerPathSubnode> contig = rawcontig;
    if (containsKmerRepeat(contig)) {
        // recalculate the called contig, this may break the contig at the repeated kmer
        MisassemblyFixer fixed = new MisassemblyFixer(contig);
        contig = new ArrayDeque<KmerPathSubnode>(fixed.correctMisassignedEvidence(evidenceTracker.support(contig)));
    }
    if (contig.isEmpty())
        return null;
    int contigLength = contig.stream().mapToInt(sn -> sn.length()).sum();
    int targetAnchorLength = Math.max(Math.min(contigLength, maxExpectedBreakendLength()), maxAnchorLength);
    KmerPathNodePath startAnchorPath = new KmerPathNodePath(contig.getFirst(), false, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getFirst().length());
    startAnchorPath.greedyTraverse(true, false);
    ArrayDeque<KmerPathSubnode> startingAnchor = startAnchorPath.headNode().asSubnodes();
    startingAnchor.removeLast();
    // make sure we have enough of the graph loaded so that when
    // we traverse forward, our anchor sequence will be fully defined
    advanceUnderlying(contig.getLast().lastEnd() + targetAnchorLength + minDistanceFromNextPositionForEvidenceToBeFullyLoaded());
    KmerPathNodePath endAnchorPath = new KmerPathNodePath(contig.getLast(), true, targetAnchorLength + maxEvidenceSupportIntervalWidth + contig.getLast().length());
    endAnchorPath.greedyTraverse(true, false);
    ArrayDeque<KmerPathSubnode> endingAnchor = endAnchorPath.headNode().asSubnodes();
    endingAnchor.removeFirst();
    List<KmerPathSubnode> fullContig = new ArrayList<KmerPathSubnode>(contig.size() + startingAnchor.size() + endingAnchor.size());
    fullContig.addAll(startingAnchor);
    fullContig.addAll(contig);
    fullContig.addAll(endingAnchor);
    byte[] bases = KmerEncodingHelper.baseCalls(fullContig.stream().flatMap(sn -> sn.node().pathKmers().stream()).collect(Collectors.toList()), k);
    byte[] quals = DeBruijnGraphBase.kmerWeightsToBaseQuals(k, fullContig.stream().flatMapToInt(sn -> sn.node().pathWeights().stream().mapToInt(Integer::intValue)).toArray());
    assert (quals.length == bases.length);
    // left aligned anchor position although it shouldn't matter since anchoring should be a single base wide
    int startAnchorPosition = startingAnchor.size() == 0 ? 0 : startingAnchor.getLast().lastStart() + k - 1;
    int endAnchorPosition = endingAnchor.size() == 0 ? 0 : endingAnchor.getFirst().firstStart();
    int startAnchorBaseCount = startingAnchor.size() == 0 ? 0 : startingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
    int endAnchorBaseCount = endingAnchor.size() == 0 ? 0 : endingAnchor.stream().mapToInt(n -> n.length()).sum() + k - 1;
    int startBasesToTrim = Math.max(0, startAnchorBaseCount - targetAnchorLength);
    int endingBasesToTrim = Math.max(0, endAnchorBaseCount - targetAnchorLength);
    bases = Arrays.copyOfRange(bases, startBasesToTrim, bases.length - endingBasesToTrim);
    quals = Arrays.copyOfRange(quals, startBasesToTrim, quals.length - endingBasesToTrim);
    Set<KmerEvidence> evidence = evidenceTracker.untrack(contig);
    List<DirectedEvidence> evidenceIds = evidence.stream().map(e -> e.evidence()).collect(Collectors.toList());
    SAMRecord assembledContig;
    if (startingAnchor.size() == 0 && endingAnchor.size() == 0) {
        assert (startBasesToTrim == 0);
        assert (endingBasesToTrim == 0);
        // unanchored
        if (evidence.size() > 0) {
            BreakendSummary be = Models.calculateBreakend(aes.getContext().getLinear(), evidence.stream().map(e -> e.evidence().getBreakendSummary()).collect(Collectors.toList()), evidence.stream().map(e -> ScalingHelper.toScaledWeight(e.evidenceQuality())).collect(Collectors.toList()));
            assembledContig = AssemblyFactory.createUnanchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, be, evidenceIds, bases, quals);
            if (evidence.stream().anyMatch(e -> e.isAnchored())) {
                log.debug(String.format("Unanchored assembly %s at %s:%d contains anchored evidence", assembledContig.getReadName(), contigName, contig.getFirst().firstStart()));
            }
        } else {
            // Can't call contig because we don't known the supporting breakend positions
            assembledContig = null;
        }
    } else if (startingAnchor.size() == 0) {
        // end anchored
        assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Backward, evidenceIds, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
    } else if (endingAnchor.size() == 0) {
        // start anchored
        assembledContig = AssemblyFactory.createAnchoredBreakend(aes.getContext(), aes, assemblyNameGenerator, BreakendDirection.Forward, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, bases, quals);
    } else {
        if (startAnchorBaseCount + endAnchorBaseCount >= quals.length) {
            // no unanchored bases - not an SV assembly
            assembledContig = null;
        } else {
            assembledContig = AssemblyFactory.createAnchoredBreakpoint(aes.getContext(), aes, assemblyNameGenerator, evidenceIds, referenceIndex, startAnchorPosition, startAnchorBaseCount - startBasesToTrim, referenceIndex, endAnchorPosition, endAnchorBaseCount - endingBasesToTrim, bases, quals);
        }
    }
    if (contigLength > maxExpectedBreakendLength()) {
        log.debug(String.format("Called breakend contig %s at %s:%d-%d of length %d when maximum expected breakend contig length is %d", assembledContig == null ? "" : assembledContig.getReadName(), contigName, rawcontig.getFirst().firstStart(), rawcontig.getLast().lastEnd(), contigLength, maxExpectedBreakendLength()));
    }
    if (assembledContig != null) {
        if (aes.getContext().getConfig().getVisualisation().assemblyGraph) {
            try {
                PositionalExporter.exportDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
            } catch (Exception ex) {
                log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
            }
        }
        if (aes.getContext().getConfig().getVisualisation().assemblyGraphFullSize) {
            try {
                PositionalExporter.exportNodeDot(new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.fullsize." + contigName + "." + assembledContig.getReadName() + ".dot"), k, graphByPosition, fullContig);
            } catch (Exception ex) {
                log.debug(ex, "Error exporting assembly ", assembledContig != null ? assembledContig.getReadName() : "(null)", " ", contigName);
            }
        }
        if (aes.getContext().getConfig().getVisualisation().assemblyContigMemoization) {
            File file = new File(aes.getContext().getConfig().getVisualisation().directory, "assembly.path.memoization." + contigName + "." + Integer.toString(pathExportCount.incrementAndGet()) + ".csv");
            try {
                bestContigCaller.exportState(file);
            } catch (IOException e) {
                log.debug(e, " Unable to export assembly path memoization to ", file, " ", contigName);
            }
        }
    }
    stats.contigNodes = contig.size();
    stats.truncatedNodes = rawcontig.size() - contig.size();
    stats.contigStartPosition = contig.getFirst().firstStart();
    stats.startAnchorNodes = startingAnchor.size();
    stats.endAnchorNodes = endingAnchor.size();
    if (exportTracker != null) {
        exportTracker.trackAssembly(bestContigCaller);
    }
    // remove all evidence contributing to this assembly from the graph
    if (evidence.size() > 0) {
        removeFromGraph(evidence);
        if (Defaults.SANITY_CHECK_MEMOIZATION) {
            bestContigCaller.sanityCheck(graphByPosition);
        }
    } else {
        if (!MessageThrottler.Current.shouldSupress(log, "unsupported paths")) {
            log.error("Sanity check failure: found path with no support. Attempting to recover by direct node removal ", contigName);
        }
        for (KmerPathSubnode n : contig) {
            removeFromGraph(n.node(), true);
        }
    }
    contigsCalled++;
    if (assembledContig != null) {
        called.add(assembledContig);
    }
    return assembledContig;
}
Also used : AssemblyEvidenceSource(au.edu.wehi.idsv.AssemblyEvidenceSource) Arrays(java.util.Arrays) RangeSet(com.google.common.collect.RangeSet) KmerEncodingHelper(au.edu.wehi.idsv.debruijn.KmerEncodingHelper) SortedSet(java.util.SortedSet) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) ScalingHelper(au.edu.wehi.idsv.graph.ScalingHelper) ContigStats(au.edu.wehi.idsv.visualisation.PositionalDeBruijnGraphTracker.ContigStats) MessageThrottler(au.edu.wehi.idsv.util.MessageThrottler) Defaults(au.edu.wehi.idsv.Defaults) PeekingIterator(com.google.common.collect.PeekingIterator) TreeRangeSet(com.google.common.collect.TreeRangeSet) TreeSet(java.util.TreeSet) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) Models(au.edu.wehi.idsv.model.Models) IntervalUtil(au.edu.wehi.idsv.util.IntervalUtil) Lists(com.google.common.collect.Lists) DeBruijnGraphBase(au.edu.wehi.idsv.debruijn.DeBruijnGraphBase) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) PositionalExporter(au.edu.wehi.idsv.visualisation.PositionalExporter) LongSet(it.unimi.dsi.fastutil.longs.LongSet) AssemblyChunkTelemetry(au.edu.wehi.idsv.visualisation.AssemblyTelemetry.AssemblyChunkTelemetry) IdentityHashMap(java.util.IdentityHashMap) AssemblyIdGenerator(au.edu.wehi.idsv.AssemblyIdGenerator) Iterator(java.util.Iterator) Collection(java.util.Collection) Range(com.google.common.collect.Range) Set(java.util.Set) IOException(java.io.IOException) BreakendDirection(au.edu.wehi.idsv.BreakendDirection) SanityCheckFailureException(au.edu.wehi.idsv.SanityCheckFailureException) Collectors(java.util.stream.Collectors) DirectedEvidence(au.edu.wehi.idsv.DirectedEvidence) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) PositionalDeBruijnGraphTracker(au.edu.wehi.idsv.visualisation.PositionalDeBruijnGraphTracker) List(java.util.List) Log(htsjdk.samtools.util.Log) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) ObjectOpenCustomHashSet(it.unimi.dsi.fastutil.objects.ObjectOpenCustomHashSet) AssemblyFactory(au.edu.wehi.idsv.AssemblyFactory) Entry(java.util.Map.Entry) Queue(java.util.Queue) Long2ObjectMap(it.unimi.dsi.fastutil.longs.Long2ObjectMap) ArrayDeque(java.util.ArrayDeque) ArrayList(java.util.ArrayList) BreakendSummary(au.edu.wehi.idsv.BreakendSummary) IOException(java.io.IOException) IOException(java.io.IOException) SanityCheckFailureException(au.edu.wehi.idsv.SanityCheckFailureException) DirectedEvidence(au.edu.wehi.idsv.DirectedEvidence) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Aggregations

BreakendDirection (au.edu.wehi.idsv.BreakendDirection)2 IntervalUtil (au.edu.wehi.idsv.util.IntervalUtil)2 MessageThrottler (au.edu.wehi.idsv.util.MessageThrottler)2 Lists (com.google.common.collect.Lists)2 SAMRecord (htsjdk.samtools.SAMRecord)2 Log (htsjdk.samtools.util.Log)2 ArrayDeque (java.util.ArrayDeque)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 List (java.util.List)2 AssemblyEvidenceSource (au.edu.wehi.idsv.AssemblyEvidenceSource)1 AssemblyFactory (au.edu.wehi.idsv.AssemblyFactory)1 AssemblyIdGenerator (au.edu.wehi.idsv.AssemblyIdGenerator)1 BreakendSummary (au.edu.wehi.idsv.BreakendSummary)1 Defaults (au.edu.wehi.idsv.Defaults)1 DirectedEvidence (au.edu.wehi.idsv.DirectedEvidence)1 SanityCheckFailureException (au.edu.wehi.idsv.SanityCheckFailureException)1 AlignerFactory (au.edu.wehi.idsv.alignment.AlignerFactory)1 Alignment (au.edu.wehi.idsv.alignment.Alignment)1 DeBruijnGraphBase (au.edu.wehi.idsv.debruijn.DeBruijnGraphBase)1