Search in sources :

Example 61 with CigarElement

use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.

the class JAlignerAligner method alignmentToCigar.

/**
 * Converts an alignment of a read (seq2) against a reference sequence (seq1) to a read cigar
 * @param alignment computed alignment
 * @return CIGAR of seq2 relative to reference seq1
 */
private static Cigar alignmentToCigar(jaligner.Alignment alignment) {
    List<CigarElement> cigar = new ArrayList<CigarElement>();
    if (alignment.getStart2() > 0) {
        cigar.add(new CigarElement(alignment.getStart2(), CigarOperator.SOFT_CLIP));
    }
    char[] seq1 = alignment.getSequence1();
    char[] seq2 = alignment.getSequence2();
    int length = 0;
    CigarOperator op = CigarOperator.MATCH_OR_MISMATCH;
    for (int i = 0; i < seq1.length; i++) {
        CigarOperator currentOp;
        if (seq1[i] == jaligner.Alignment.GAP) {
            currentOp = CigarOperator.INSERTION;
        } else if (seq2[i] == jaligner.Alignment.GAP) {
            currentOp = CigarOperator.DELETION;
        } else {
            currentOp = CigarOperator.MATCH_OR_MISMATCH;
        }
        if (currentOp != op) {
            if (length > 0) {
                cigar.add(new CigarElement(length, op));
            }
            op = currentOp;
            length = 0;
        }
        length++;
    }
    cigar.add(new CigarElement(length, op));
    int basesConsumed = new Cigar(cigar).getReadLength();
    int seqLength = alignment.getOriginalSequence2().length();
    if (basesConsumed != seqLength) {
        cigar.add(new CigarElement(seqLength - basesConsumed, CigarOperator.SOFT_CLIP));
    }
    return new Cigar(cigar);
}
Also used : Cigar(htsjdk.samtools.Cigar) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement)

Example 62 with CigarElement

use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.

the class BamToBed method getSpanningDeletion.

public static List<SimpleBEDFeature> getSpanningDeletion(final SAMRecord r, final int minMapq, final int minIndelSize, final int maxMappedBases, final int minIndelComponentSize) {
    List<SimpleBEDFeature> list = new ArrayList<SimpleBEDFeature>();
    if (r.getMappingQuality() < minMapq)
        return list;
    List<CigarElement> ce = r.getCigar().getCigarElements();
    int offset = 0;
    for (int i = 0; i < ce.size(); i++) {
        CigarElement e = ce.get(i);
        if (e.getOperator() == CigarOperator.D) {
            if (maxMappedBases > 0) {
                throw new RuntimeException("Indel merging via MAX_INTERVENING_MAPPED_BASES not yet implemented.");
            }
            if (e.getLength() > minIndelSize) {
                list.add(new SimpleBEDFeature(r.getAlignmentStart() + offset, r.getAlignmentStart() + offset + e.getLength(), r.getReferenceName()));
            }
        }
        if (e.getOperator().consumesReferenceBases()) {
            offset += e.getLength();
        }
    }
    return list;
}
Also used : SimpleBEDFeature(htsjdk.tribble.bed.SimpleBEDFeature) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement)

Example 63 with CigarElement

use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.

the class LongReadSupportFinder method countDeletions.

private int countDeletions(SAMRecord r, int minDeletionSize, int startPosition, int endPosition) {
    int position = r.getAlignmentStart();
    int count = 0;
    for (CigarElement ce : r.getCigar().getCigarElements()) {
        if (position >= endPosition)
            return count;
        if (position >= startPosition && position <= endPosition) {
            if (ce.getOperator() == CigarOperator.D && ce.getLength() > minDeletionSize && position + ce.getLength() < endPosition) {
                // deletion contained within the interval in question
                count += ce.getLength();
            }
        }
        if (ce.getOperator().consumesReferenceBases()) {
            position += ce.getLength();
        }
    }
    return count;
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 64 with CigarElement

use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.

the class AssemblyFactory method truncateAnchorToContigBounds.

private static void truncateAnchorToContigBounds(ProcessingContext processContext, SAMRecord r) {
    int end = processContext.getDictionary().getSequences().get(r.getReferenceIndex()).getSequenceLength();
    if (r.getAlignmentStart() < 1) {
        int basesToTruncate = 1 - r.getAlignmentStart();
        ArrayList<CigarElement> cigar = new ArrayList<>(r.getCigar().getCigarElements());
        int len = cigar.get(0).getLength() - basesToTruncate;
        if (len < 0) {
            if (!MessageThrottler.Current.shouldSupress(log, "truncating assembly to contig bounds")) {
                log.warn(String.format("Attempted to truncate %d bases from start of %s with CIGAR %s", basesToTruncate, r.getReadName(), r.getCigarString()));
            }
        } else {
            cigar.set(0, new CigarElement(len, cigar.get(0).getOperator()));
            r.setAlignmentStart(1);
            r.setCigar(new Cigar(cigar));
            byte[] b = r.getReadBases();
            if (b != null && b != SAMRecord.NULL_SEQUENCE) {
                r.setReadBases(Arrays.copyOfRange(b, basesToTruncate, b.length));
            }
            byte[] q = r.getBaseQualities();
            if (q != null && q != SAMRecord.NULL_QUALS) {
                r.setBaseQualities(Arrays.copyOfRange(q, basesToTruncate, q.length));
            }
        }
    }
    if (r.getAlignmentEnd() > end) {
        int basesToTruncate = r.getAlignmentEnd() - end;
        ArrayList<CigarElement> cigar = new ArrayList<>(r.getCigar().getCigarElements());
        CigarElement ce = cigar.get(cigar.size() - 1);
        int len = ce.getLength() - basesToTruncate;
        if (len < 1) {
            if (!MessageThrottler.Current.shouldSupress(log, "truncating assembly to contig bounds")) {
                log.warn(String.format("Attempted to truncate %d bases from end of %s with CIGAR %s", basesToTruncate, r.getReadName(), r.getCigarString()));
            }
        } else {
            ce = new CigarElement(len, ce.getOperator());
            cigar.set(cigar.size() - 1, ce);
            r.setCigar(new Cigar(cigar));
            byte[] b = r.getReadBases();
            if (b != null && b != SAMRecord.NULL_SEQUENCE) {
                r.setReadBases(Arrays.copyOf(b, b.length - basesToTruncate));
            }
            byte[] q = r.getBaseQualities();
            if (q != null && q != SAMRecord.NULL_QUALS) {
                r.setBaseQualities(Arrays.copyOf(q, q.length - basesToTruncate));
            }
        }
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement)

Example 65 with CigarElement

use of htsjdk.samtools.CigarElement in project jvarkit by lindenb.

the class BamToSVG method printSamRecord.

private void printSamRecord(XMLStreamWriter w, final SAMRecord record, final Map<Integer, Counter<Character>> consensus) throws XMLStreamException {
    double mid_y = this.featureHeight / 2.0;
    final double y_h95 = this.featureHeight * 0.90;
    final double y_top5 = (this.featureHeight - y_h95) / 2.0;
    final double y_bot5 = y_top5 + y_h95;
    final double arrow_w = this.featureHeight / 3.0;
    w.writeStartElement(SVG.NS, "g");
    String title = record.getReadName();
    w.writeAttribute("title", title);
    /* print that sam record */
    final int unclipped_start = record.getUnclippedStart();
    Cigar cigar = record.getCigar();
    if (cigar == null)
        return;
    byte[] bases = record.getReadBases();
    if (bases == null)
        return;
    byte[] qualities = record.getBaseQualities();
    if (qualities == null)
        return;
    int readPos = 0;
    Map<Integer, String> pos2insertions = new HashMap<Integer, String>();
    List<CigarElement> cigarElements = cigar.getCigarElements();
    /* find position of arrow */
    int arrow_cigar_index = -1;
    for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
        CigarElement ce = cigarElements.get(cidx);
        CigarOperator op = ce.getOperator();
        switch(op) {
            // threw
            case H:
            // threw
            case S:
                if (!this.showClipping)
                    break;
            case M:
            case EQ:
            case X:
                {
                    arrow_cigar_index = cidx;
                }
            default:
                break;
        }
        if (record.getReadNegativeStrandFlag() && arrow_cigar_index != -1) {
            break;
        }
    }
    int refPos = unclipped_start;
    /* loop over cigar string */
    for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
        CigarElement ce = cigarElements.get(cidx);
        CigarOperator op = ce.getOperator();
        boolean in_clip = false;
        switch(ce.getOperator()) {
            case D:
            case N:
                {
                    int c_start = trim(refPos);
                    int c_end = trim(refPos + ce.getLength());
                    if (c_start < c_end) {
                        w.writeEmptyElement("line");
                        w.writeAttribute("class", "indel");
                        w.writeAttribute("title", op.name() + ce.getLength());
                        w.writeAttribute("x1", format(baseToPixel(c_start)));
                        w.writeAttribute("x2", format(baseToPixel(c_end)));
                        w.writeAttribute("y1", format(mid_y));
                        w.writeAttribute("y2", format(mid_y));
                    }
                    refPos += ce.getLength();
                    break;
                }
            case I:
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < ce.getLength(); ++i) {
                        sb.append((char) bases[readPos++]);
                    }
                    pos2insertions.put(refPos, sb.toString());
                    break;
                }
            case H:
                {
                    if (!this.showClipping) {
                        refPos += ce.getLength();
                        break;
                    }
                    in_clip = true;
                // NO break;
                }
            case S:
                {
                    if (!this.showClipping) {
                        readPos += ce.getLength();
                        refPos += ce.getLength();
                        break;
                    }
                    in_clip = true;
                // NO break;
                }
            case X:
            case EQ:
            case M:
                {
                    int match_start = refPos;
                    int match_end = refPos + ce.getLength();
                    // print sam background
                    StringBuilder sb = new StringBuilder();
                    if (record.getReadNegativeStrandFlag() && match_start >= this.interval.start && match_start <= this.interval.end && cidx == arrow_cigar_index) {
                        sb.append(" M ").append(format(baseToPixel(match_start) + arrow_w)).append(',').append(y_top5);
                        sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w)));
                        sb.append(" L ").append(format(baseToPixel(match_start))).append(',').append(mid_y);
                        sb.append(" Z");
                    } else if (!record.getReadNegativeStrandFlag() && match_end >= this.interval.start && match_end <= this.interval.end && cidx == arrow_cigar_index) {
                        sb.append(" M ").append(format(baseToPixel(match_end) - arrow_w)).append(',').append(y_top5);
                        sb.append(" h ").append(format(-(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w)));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w));
                        sb.append(" L ").append(format(baseToPixel(match_end))).append(',').append(mid_y);
                        sb.append(" Z");
                    } else {
                        sb.append(" M ").append(format(baseToPixel(trim(match_start)))).append(',').append(y_top5);
                        sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start))));
                        sb.append(" v ").append(format(y_h95));
                        sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start)))));
                        sb.append(" Z");
                    }
                    w.writeEmptyElement("path");
                    w.writeAttribute("d", sb.toString().trim());
                    if (ce.getOperator() == CigarOperator.H || ce.getOperator() == CigarOperator.S) {
                        w.writeAttribute("style", "fill:yellow;");
                    } else {
                        w.writeAttribute("style", "fill:url(#f" + record.getFlags() + ");");
                    }
                    if (op.consumesReadBases()) {
                        for (int i = 0; i < ce.getLength(); ++i) {
                            char ca = (char) bases[readPos];
                            if (!in_clip) {
                                Counter<Character> count = consensus.get(refPos + i);
                                if (count == null) {
                                    count = new Counter<>();
                                    consensus.put(refPos + i, count);
                                }
                                count.incr(ca);
                            }
                            char cb = 'N';
                            if (genomicSequence != null) {
                                cb = Character.toUpperCase(genomicSequence.charAt(refPos + i - 1));
                            }
                            if (cb != 'N' && ca != 'N' && cb != ca && !in_clip) {
                                w.writeEmptyElement("use");
                                w.writeAttribute("x", format(baseToPixel(refPos + i)));
                                // w.writeAttribute("y",format(y_top));never needed
                                w.writeAttribute("xlink", XLINK.NS, "href", "#mut");
                            }
                            if (this.interval.contains(refPos + i) && this.featureWidth > 5) {
                                // int qual = qualities[readPos];
                                w.writeEmptyElement("use");
                                w.writeAttribute("x", format(baseToPixel(refPos + i)));
                                // w.writeAttribute("y",format(y_top));never needed
                                w.writeAttribute("xlink", XLINK.NS, "href", "#b" + ca);
                            }
                            readPos++;
                        }
                    }
                    refPos += ce.getLength();
                    break;
                }
            default:
                {
                    throw new RuntimeException("Unknown SAM op: " + ce.getOperator());
                }
        }
    }
    for (Integer pos : pos2insertions.keySet()) {
        if (pos < this.interval.start)
            continue;
        if (pos > this.interval.end)
            continue;
        String insertion = pos2insertions.get(pos);
        w.writeEmptyElement("line");
        double x = baseToPixel(pos);
        w.writeAttribute("title", "Insertion " + insertion);
        w.writeAttribute("class", "insert");
        w.writeAttribute("x1", format(x));
        w.writeAttribute("x2", format(x));
        w.writeAttribute("y1", format(y_top5));
        w.writeAttribute("y2", format(y_bot5));
    }
    // g
    w.writeEndElement();
}
Also used : HashMap(java.util.HashMap) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) Cigar(htsjdk.samtools.Cigar) Counter(com.github.lindenb.jvarkit.util.Counter)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13