Search in sources :

Example 1 with Alignment

use of au.edu.wehi.idsv.alignment.Alignment in project gridss by PapenfussLab.

the class SAMRecordUtil method calculateSATags.

private static void calculateSATags(List<SAMRecord> list) {
    // TODO use CC, CP, HI, IH tags if they are present
    // TODO break ties in an other other than just overwriting the previous
    // alignment that
    // finishes at a given position
    SortedMap<Integer, SAMRecord> start = new TreeMap<Integer, SAMRecord>();
    SortedMap<Integer, SAMRecord> end = new TreeMap<Integer, SAMRecord>();
    for (SAMRecord r : list) {
        start.put(getFirstAlignedBaseReadOffset(r), r);
        end.put(getLastAlignedBaseReadOffset(r), r);
    }
    for (SAMRecord r : list) {
        if (r.getAttribute(SAMTag.SA.name()) != null)
            continue;
        ArrayDeque<SAMRecord> alignment = new ArrayDeque<SAMRecord>();
        SAMRecord prev = r;
        while (prev != null) {
            SortedMap<Integer, SAMRecord> before = end.headMap(getFirstAlignedBaseReadOffset(prev));
            if (before.isEmpty()) {
                prev = null;
            } else {
                prev = before.get(before.lastKey());
                alignment.addFirst(prev);
            }
        }
        SAMRecord next = r;
        while (next != null) {
            SortedMap<Integer, SAMRecord> after = start.tailMap(getLastAlignedBaseReadOffset(next) + 1);
            if (after.isEmpty()) {
                next = null;
            } else {
                next = after.get(after.firstKey());
                alignment.addLast(next);
            }
        }
        if (alignment.size() == 0) {
            r.setAttribute(SAMTag.SA.name(), null);
        } else {
            List<SAMRecord> aln = new ArrayList<>(alignment);
            Collections.sort(aln, new Ordering<SAMRecord>() {

                @Override
                public int compare(SAMRecord arg0, SAMRecord arg1) {
                    // element points to the primary line.
                    return Booleans.compare(arg0.getSupplementaryAlignmentFlag(), arg1.getSupplementaryAlignmentFlag());
                }
            });
            StringBuilder sb = new StringBuilder();
            for (SAMRecord chim : aln) {
                sb.append(new ChimericAlignment(chim));
                sb.append(";");
            }
            sb.deleteCharAt(sb.length() - 1);
            r.setAttribute(SAMTag.SA.name(), sb.toString());
        }
    }
    Set<ChimericAlignment> referencedReads = list.stream().flatMap(r -> ChimericAlignment.getChimericAlignments(r.getStringAttribute(SAMTag.SA.name())).stream()).collect(Collectors.toSet());
    // validate SA tags
    for (SAMRecord r : list) {
        referencedReads.remove(new ChimericAlignment(r));
    }
    if (!referencedReads.isEmpty()) {
        if (!MessageThrottler.Current.shouldSupress(log, "Missing records referred to by SA tags")) {
            log.warn(String.format("SA tag of read %s refers to missing alignments %s", list.get(0).getReadName(), referencedReads));
        }
    }
}
Also used : Booleans(com.google.common.primitives.Booleans) TextCigarCodec(htsjdk.samtools.TextCigarCodec) Cigar(htsjdk.samtools.Cigar) Arrays(java.util.Arrays) Iterables(com.google.common.collect.Iterables) SequenceUtil(htsjdk.samtools.util.SequenceUtil) SAMUtils(htsjdk.samtools.SAMUtils) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) HashMap(java.util.HashMap) ArrayUtils(org.apache.commons.lang3.ArrayUtils) MessageThrottler(au.edu.wehi.idsv.util.MessageThrottler) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) IntervalUtil(au.edu.wehi.idsv.util.IntervalUtil) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMTag(htsjdk.samtools.SAMTag) Lists(com.google.common.collect.Lists) StringUtil(htsjdk.samtools.util.StringUtil) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) PairOrientation(htsjdk.samtools.SamPairUtil.PairOrientation) ImmutableSet(com.google.common.collect.ImmutableSet) SAMException(htsjdk.samtools.SAMException) Set(java.util.Set) SamPairUtil(htsjdk.samtools.SamPairUtil) BreakendDirection(au.edu.wehi.idsv.BreakendDirection) Alignment(au.edu.wehi.idsv.alignment.Alignment) Collectors(java.util.stream.Collectors) ComparisonChain(com.google.common.collect.ComparisonChain) Sets(com.google.common.collect.Sets) Bytes(com.google.common.primitives.Bytes) SAMRecord(htsjdk.samtools.SAMRecord) MathUtil(au.edu.wehi.idsv.util.MathUtil) List(java.util.List) AlignerFactory(au.edu.wehi.idsv.alignment.AlignerFactory) Log(htsjdk.samtools.util.Log) TreeMap(java.util.TreeMap) Ordering(com.google.common.collect.Ordering) SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) Optional(java.util.Optional) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ArrayDeque(java.util.ArrayDeque) Comparator(java.util.Comparator) Collections(java.util.Collections) SortedMap(java.util.SortedMap) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) ArrayDeque(java.util.ArrayDeque) SAMRecord(htsjdk.samtools.SAMRecord)

Example 2 with Alignment

use of au.edu.wehi.idsv.alignment.Alignment in project gridss by PapenfussLab.

the class SAMRecordUtil method realign.

/**
 * Performs local realignment of the given SAMRecord in a window around the
 * alignment location
 *
 * @param reference
 *            reference genome
 * @param read
 *            read
 * @param windowSize
 *            number of bases to extend window around alignment
 * @param extendWindowForClippedBases
 *            extend window for soft clipped bases
 * @return copy of realigned read if alignment changed, read if realignment
 *         did not change the alignment
 */
public static SAMRecord realign(ReferenceLookup reference, SAMRecord read, int windowSize, boolean extendWindowForClippedBases) {
    if (read.getReadUnmappedFlag())
        return read;
    SAMSequenceRecord refSeq = reference.getSequenceDictionary().getSequence(read.getReferenceIndex());
    // find reference bounds of read. Negative deletions mean we can't just
    // use
    // getAlignmentStart() and getAlignmentEnd()
    int pos = read.getAlignmentStart();
    int start = pos;
    int end = pos;
    for (CigarElement ce : read.getCigar().getCigarElements()) {
        if (ce.getOperator().consumesReferenceBases()) {
            pos += ce.getLength();
        }
        start = Math.min(start, pos);
        end = Math.max(end, pos - 1);
    }
    // extend bounds
    start -= windowSize;
    end += windowSize;
    if (extendWindowForClippedBases) {
        start -= getStartSoftClipLength(read);
        end += getEndSoftClipLength(read);
    }
    // don't overrun contig bounds
    start = Math.max(1, start);
    end = Math.min(refSeq.getSequenceLength(), end);
    byte[] ass = read.getReadBases();
    byte[] ref = reference.getSubsequenceAt(refSeq.getSequenceName(), start, end).getBases();
    if (ass == null || ref == null || ass.length == 0 || ref.length == 0) {
        return read;
    }
    // is encountered
    for (int i = 0; i < ass.length; i++) {
        if (!htsjdk.samtools.util.SequenceUtil.isValidBase(ass[i])) {
            ass[i] = 'N';
        }
    }
    for (int i = 0; i < ref.length; i++) {
        if (!htsjdk.samtools.util.SequenceUtil.isValidBase(ref[i])) {
            ref[i] = 'N';
        }
    }
    Alignment alignment = null;
    if (ass != null && ass.length > 0 && ref != null && ref.length > 0) {
        try {
            alignment = AlignerFactory.create().align_smith_waterman(ass, ref);
        } catch (Exception e) {
            // swallow and log alignment error
            if (!MessageThrottler.Current.shouldSupress(log, "local realignment failures")) {
                log.error(e, String.format("Error aligning %s to %s:%d-%d", read.getReadName(), refSeq.getSequenceName(), start, end));
            }
        }
    }
    if (alignment == null) {
        return read;
    }
    Cigar cigar = TextCigarCodec.decode(alignment.getCigar());
    int alignmentStart = start + alignment.getStartPosition();
    if (alignmentStart == read.getAlignmentStart() && cigar.equals(read.getCigar())) {
        return read;
    }
    SAMRecord copy = SAMRecordUtil.clone(read);
    if (!cigar.equals(read.getCigar())) {
        copy.setCigar(cigar);
        copy.setAttribute(SAMTag.OC.name(), read.getCigarString());
    }
    if (alignmentStart != read.getAlignmentStart()) {
        copy.setAlignmentStart(alignmentStart);
        copy.setAttribute(SAMTag.OP.name(), read.getAlignmentStart());
    }
    return copy;
}
Also used : Alignment(au.edu.wehi.idsv.alignment.Alignment) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CigarElement(htsjdk.samtools.CigarElement) SAMException(htsjdk.samtools.SAMException)

Aggregations

Alignment (au.edu.wehi.idsv.alignment.Alignment)2 Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)2 SAMException (htsjdk.samtools.SAMException)2 SAMRecord (htsjdk.samtools.SAMRecord)2 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)2 BreakendDirection (au.edu.wehi.idsv.BreakendDirection)1 AlignerFactory (au.edu.wehi.idsv.alignment.AlignerFactory)1 ReferenceLookup (au.edu.wehi.idsv.picard.ReferenceLookup)1 IntervalUtil (au.edu.wehi.idsv.util.IntervalUtil)1 MathUtil (au.edu.wehi.idsv.util.MathUtil)1 MessageThrottler (au.edu.wehi.idsv.util.MessageThrottler)1 ComparisonChain (com.google.common.collect.ComparisonChain)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Iterables (com.google.common.collect.Iterables)1 Lists (com.google.common.collect.Lists)1 Ordering (com.google.common.collect.Ordering)1 Sets (com.google.common.collect.Sets)1 Booleans (com.google.common.primitives.Booleans)1 Bytes (com.google.common.primitives.Bytes)1