use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.
the class JAlignerAligner method alignmentToCigar.
/**
* Converts an alignment of a read (seq2) against a reference sequence (seq1) to a read cigar
* @param alignment computed alignment
* @return CIGAR of seq2 relative to reference seq1
*/
private static Cigar alignmentToCigar(jaligner.Alignment alignment) {
List<CigarElement> cigar = new ArrayList<CigarElement>();
if (alignment.getStart2() > 0) {
cigar.add(new CigarElement(alignment.getStart2(), CigarOperator.SOFT_CLIP));
}
char[] seq1 = alignment.getSequence1();
char[] seq2 = alignment.getSequence2();
int length = 0;
CigarOperator op = CigarOperator.MATCH_OR_MISMATCH;
for (int i = 0; i < seq1.length; i++) {
CigarOperator currentOp;
if (seq1[i] == jaligner.Alignment.GAP) {
currentOp = CigarOperator.INSERTION;
} else if (seq2[i] == jaligner.Alignment.GAP) {
currentOp = CigarOperator.DELETION;
} else {
currentOp = CigarOperator.MATCH_OR_MISMATCH;
}
if (currentOp != op) {
if (length > 0) {
cigar.add(new CigarElement(length, op));
}
op = currentOp;
length = 0;
}
length++;
}
cigar.add(new CigarElement(length, op));
int basesConsumed = new Cigar(cigar).getReadLength();
int seqLength = alignment.getOriginalSequence2().length();
if (basesConsumed != seqLength) {
cigar.add(new CigarElement(seqLength - basesConsumed, CigarOperator.SOFT_CLIP));
}
return new Cigar(cigar);
}
use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.
the class BamToBed method getSpanningDeletion.
public static List<SimpleBEDFeature> getSpanningDeletion(final SAMRecord r, final int minMapq, final int minIndelSize, final int maxMappedBases, final int minIndelComponentSize) {
List<SimpleBEDFeature> list = new ArrayList<SimpleBEDFeature>();
if (r.getMappingQuality() < minMapq)
return list;
List<CigarElement> ce = r.getCigar().getCigarElements();
int offset = 0;
for (int i = 0; i < ce.size(); i++) {
CigarElement e = ce.get(i);
if (e.getOperator() == CigarOperator.D) {
if (maxMappedBases > 0) {
throw new RuntimeException("Indel merging via MAX_INTERVENING_MAPPED_BASES not yet implemented.");
}
if (e.getLength() > minIndelSize) {
list.add(new SimpleBEDFeature(r.getAlignmentStart() + offset, r.getAlignmentStart() + offset + e.getLength(), r.getReferenceName()));
}
}
if (e.getOperator().consumesReferenceBases()) {
offset += e.getLength();
}
}
return list;
}
use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.
the class LongReadSupportFinder method countDeletions.
private int countDeletions(SAMRecord r, int minDeletionSize, int startPosition, int endPosition) {
int position = r.getAlignmentStart();
int count = 0;
for (CigarElement ce : r.getCigar().getCigarElements()) {
if (position >= endPosition)
return count;
if (position >= startPosition && position <= endPosition) {
if (ce.getOperator() == CigarOperator.D && ce.getLength() > minDeletionSize && position + ce.getLength() < endPosition) {
// deletion contained within the interval in question
count += ce.getLength();
}
}
if (ce.getOperator().consumesReferenceBases()) {
position += ce.getLength();
}
}
return count;
}
use of htsjdk.samtools.CigarElement in project gridss by PapenfussLab.
the class AssemblyFactory method truncateAnchorToContigBounds.
private static void truncateAnchorToContigBounds(ProcessingContext processContext, SAMRecord r) {
int end = processContext.getDictionary().getSequences().get(r.getReferenceIndex()).getSequenceLength();
if (r.getAlignmentStart() < 1) {
int basesToTruncate = 1 - r.getAlignmentStart();
ArrayList<CigarElement> cigar = new ArrayList<>(r.getCigar().getCigarElements());
int len = cigar.get(0).getLength() - basesToTruncate;
if (len < 0) {
if (!MessageThrottler.Current.shouldSupress(log, "truncating assembly to contig bounds")) {
log.warn(String.format("Attempted to truncate %d bases from start of %s with CIGAR %s", basesToTruncate, r.getReadName(), r.getCigarString()));
}
} else {
cigar.set(0, new CigarElement(len, cigar.get(0).getOperator()));
r.setAlignmentStart(1);
r.setCigar(new Cigar(cigar));
byte[] b = r.getReadBases();
if (b != null && b != SAMRecord.NULL_SEQUENCE) {
r.setReadBases(Arrays.copyOfRange(b, basesToTruncate, b.length));
}
byte[] q = r.getBaseQualities();
if (q != null && q != SAMRecord.NULL_QUALS) {
r.setBaseQualities(Arrays.copyOfRange(q, basesToTruncate, q.length));
}
}
}
if (r.getAlignmentEnd() > end) {
int basesToTruncate = r.getAlignmentEnd() - end;
ArrayList<CigarElement> cigar = new ArrayList<>(r.getCigar().getCigarElements());
CigarElement ce = cigar.get(cigar.size() - 1);
int len = ce.getLength() - basesToTruncate;
if (len < 1) {
if (!MessageThrottler.Current.shouldSupress(log, "truncating assembly to contig bounds")) {
log.warn(String.format("Attempted to truncate %d bases from end of %s with CIGAR %s", basesToTruncate, r.getReadName(), r.getCigarString()));
}
} else {
ce = new CigarElement(len, ce.getOperator());
cigar.set(cigar.size() - 1, ce);
r.setCigar(new Cigar(cigar));
byte[] b = r.getReadBases();
if (b != null && b != SAMRecord.NULL_SEQUENCE) {
r.setReadBases(Arrays.copyOf(b, b.length - basesToTruncate));
}
byte[] q = r.getBaseQualities();
if (q != null && q != SAMRecord.NULL_QUALS) {
r.setBaseQualities(Arrays.copyOf(q, q.length - basesToTruncate));
}
}
}
}
use of htsjdk.samtools.CigarElement in project jvarkit by lindenb.
the class BamToSVG method printSamRecord.
private void printSamRecord(XMLStreamWriter w, final SAMRecord record, final Map<Integer, Counter<Character>> consensus) throws XMLStreamException {
double mid_y = this.featureHeight / 2.0;
final double y_h95 = this.featureHeight * 0.90;
final double y_top5 = (this.featureHeight - y_h95) / 2.0;
final double y_bot5 = y_top5 + y_h95;
final double arrow_w = this.featureHeight / 3.0;
w.writeStartElement(SVG.NS, "g");
String title = record.getReadName();
w.writeAttribute("title", title);
/* print that sam record */
final int unclipped_start = record.getUnclippedStart();
Cigar cigar = record.getCigar();
if (cigar == null)
return;
byte[] bases = record.getReadBases();
if (bases == null)
return;
byte[] qualities = record.getBaseQualities();
if (qualities == null)
return;
int readPos = 0;
Map<Integer, String> pos2insertions = new HashMap<Integer, String>();
List<CigarElement> cigarElements = cigar.getCigarElements();
/* find position of arrow */
int arrow_cigar_index = -1;
for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
CigarElement ce = cigarElements.get(cidx);
CigarOperator op = ce.getOperator();
switch(op) {
// threw
case H:
// threw
case S:
if (!this.showClipping)
break;
case M:
case EQ:
case X:
{
arrow_cigar_index = cidx;
}
default:
break;
}
if (record.getReadNegativeStrandFlag() && arrow_cigar_index != -1) {
break;
}
}
int refPos = unclipped_start;
/* loop over cigar string */
for (int cidx = 0; cidx < cigarElements.size(); cidx++) {
CigarElement ce = cigarElements.get(cidx);
CigarOperator op = ce.getOperator();
boolean in_clip = false;
switch(ce.getOperator()) {
case D:
case N:
{
int c_start = trim(refPos);
int c_end = trim(refPos + ce.getLength());
if (c_start < c_end) {
w.writeEmptyElement("line");
w.writeAttribute("class", "indel");
w.writeAttribute("title", op.name() + ce.getLength());
w.writeAttribute("x1", format(baseToPixel(c_start)));
w.writeAttribute("x2", format(baseToPixel(c_end)));
w.writeAttribute("y1", format(mid_y));
w.writeAttribute("y2", format(mid_y));
}
refPos += ce.getLength();
break;
}
case I:
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < ce.getLength(); ++i) {
sb.append((char) bases[readPos++]);
}
pos2insertions.put(refPos, sb.toString());
break;
}
case H:
{
if (!this.showClipping) {
refPos += ce.getLength();
break;
}
in_clip = true;
// NO break;
}
case S:
{
if (!this.showClipping) {
readPos += ce.getLength();
refPos += ce.getLength();
break;
}
in_clip = true;
// NO break;
}
case X:
case EQ:
case M:
{
int match_start = refPos;
int match_end = refPos + ce.getLength();
// print sam background
StringBuilder sb = new StringBuilder();
if (record.getReadNegativeStrandFlag() && match_start >= this.interval.start && match_start <= this.interval.end && cidx == arrow_cigar_index) {
sb.append(" M ").append(format(baseToPixel(match_start) + arrow_w)).append(',').append(y_top5);
sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w));
sb.append(" v ").append(format(y_h95));
sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(match_start) - arrow_w)));
sb.append(" L ").append(format(baseToPixel(match_start))).append(',').append(mid_y);
sb.append(" Z");
} else if (!record.getReadNegativeStrandFlag() && match_end >= this.interval.start && match_end <= this.interval.end && cidx == arrow_cigar_index) {
sb.append(" M ").append(format(baseToPixel(match_end) - arrow_w)).append(',').append(y_top5);
sb.append(" h ").append(format(-(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w)));
sb.append(" v ").append(format(y_h95));
sb.append(" h ").append(format(baseToPixel(match_end) - baseToPixel(trim(match_start)) - arrow_w));
sb.append(" L ").append(format(baseToPixel(match_end))).append(',').append(mid_y);
sb.append(" Z");
} else {
sb.append(" M ").append(format(baseToPixel(trim(match_start)))).append(',').append(y_top5);
sb.append(" h ").append(format(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start))));
sb.append(" v ").append(format(y_h95));
sb.append(" h ").append(format(-(baseToPixel(trim(match_end)) - baseToPixel(trim(match_start)))));
sb.append(" Z");
}
w.writeEmptyElement("path");
w.writeAttribute("d", sb.toString().trim());
if (ce.getOperator() == CigarOperator.H || ce.getOperator() == CigarOperator.S) {
w.writeAttribute("style", "fill:yellow;");
} else {
w.writeAttribute("style", "fill:url(#f" + record.getFlags() + ");");
}
if (op.consumesReadBases()) {
for (int i = 0; i < ce.getLength(); ++i) {
char ca = (char) bases[readPos];
if (!in_clip) {
Counter<Character> count = consensus.get(refPos + i);
if (count == null) {
count = new Counter<>();
consensus.put(refPos + i, count);
}
count.incr(ca);
}
char cb = 'N';
if (genomicSequence != null) {
cb = Character.toUpperCase(genomicSequence.charAt(refPos + i - 1));
}
if (cb != 'N' && ca != 'N' && cb != ca && !in_clip) {
w.writeEmptyElement("use");
w.writeAttribute("x", format(baseToPixel(refPos + i)));
// w.writeAttribute("y",format(y_top));never needed
w.writeAttribute("xlink", XLINK.NS, "href", "#mut");
}
if (this.interval.contains(refPos + i) && this.featureWidth > 5) {
// int qual = qualities[readPos];
w.writeEmptyElement("use");
w.writeAttribute("x", format(baseToPixel(refPos + i)));
// w.writeAttribute("y",format(y_top));never needed
w.writeAttribute("xlink", XLINK.NS, "href", "#b" + ca);
}
readPos++;
}
}
refPos += ce.getLength();
break;
}
default:
{
throw new RuntimeException("Unknown SAM op: " + ce.getOperator());
}
}
}
for (Integer pos : pos2insertions.keySet()) {
if (pos < this.interval.start)
continue;
if (pos > this.interval.end)
continue;
String insertion = pos2insertions.get(pos);
w.writeEmptyElement("line");
double x = baseToPixel(pos);
w.writeAttribute("title", "Insertion " + insertion);
w.writeAttribute("class", "insert");
w.writeAttribute("x1", format(x));
w.writeAttribute("x2", format(x));
w.writeAttribute("y1", format(y_top5));
w.writeAttribute("y2", format(y_bot5));
}
// g
w.writeEndElement();
}
Aggregations