use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class HaplotypeUnitTest method testComplexInsertionAllele.
@Test
public void testComplexInsertionAllele() {
final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
h1CigarList.add(new CigarElement(4, CigarOperator.M));
h1CigarList.add(new CigarElement(10, CigarOperator.I));
h1CigarList.add(new CigarElement(8, CigarOperator.M));
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class HaplotypeUnitTest method testSimpleDeletionAllele.
@Test
public void testSimpleDeletionAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class HaplotypeUnitTest method testSimpleSNPAllele.
@Test
public void testSimpleSNPAllele() {
final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";
final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class GappedAlignmentSplitter method split.
/**
* Splits a gapped alignment into multiple alignment regions, based on input sensitivity: i.e. if the gap(s) in the input alignment
* is equal to or larger than the size specified by {@code sensitivity}, two alignment regions will be generated.
*
* To fulfill this functionality, needs to accomplish three key tasks correctly:
* <ul>
* <li>generate appropriate cigar,</li>
* <li>infer reference coordinates,</li>
* <li>infer contig coordinates</li>
* </ul>
* As an example, an alignment with CIGAR "397S118M2D26M6I50M7I26M1I8M13D72M398S", when {@code sensitivity} is set to "1", should be split into 5 alignments
* <ul>
* <li>"397S118M594S"</li>
* <li>"515S26M568S"</li>
* <li>"547S50M512S"</li>
* <li>"631S8M470S"</li>
* <li>"639S72M398S"</li>
* </ul>
* On the other hand, an alignment with CIGAR "10M10D10M60I10M10I10M50D10M", when {@code sensitivity} is set to "50", should be split into 3 alignments
* <ul>
* <li>"10M10D10M100S"</li>
* <li>"80S10M10I10M10S"</li>
* <li>"110S10M"</li>
* </ul>
* And when an alignment has hard clipping adjacent to soft clippings, e.g. "1H2S3M5I10M20D6M7S8H", it should be split into alignments with CIGAR resembling the original CIGAR as much as possible:
* <ul>
* <li>"1H2S3M28S8H"</li>
* <li>"1H10S10M13S8H"</li>
* <li>"1H20S6M7S8H"</li>
* </ul>
*
* @return an iterable of size >= 1. if size==1, the returned iterable contains only the input (i.e. either no gap or hasn't reached sensitivity)
*/
@VisibleForTesting
public static Iterable<AlignedAssembly.AlignmentInterval> split(final AlignedAssembly.AlignmentInterval oneRegion, final int sensitivity, final int unclippedContigLen) {
final List<CigarElement> cigarElements = checkCigarAndConvertTerminalInsertionToSoftClip(oneRegion.cigarAlong5to3DirectionOfContig);
if (cigarElements.size() == 1)
return new ArrayList<>(Collections.singletonList(oneRegion));
// blunt guess
final List<AlignedAssembly.AlignmentInterval> result = new ArrayList<>(3);
final int originalMapQ = oneRegion.mapQual;
final List<CigarElement> cigarMemoryList = new ArrayList<>();
final int clippedNBasesFromStart = SVVariantDiscoveryUtils.getNumClippedBases(true, cigarElements);
final int hardClippingAtBeginning = cigarElements.get(0).getOperator() == CigarOperator.H ? cigarElements.get(0).getLength() : 0;
final int hardClippingAtEnd = (cigarElements.get(cigarElements.size() - 1).getOperator() == CigarOperator.H) ? cigarElements.get(cigarElements.size() - 1).getLength() : 0;
final CigarElement hardClippingAtBeginningMaybeNull = hardClippingAtBeginning == 0 ? null : new CigarElement(hardClippingAtBeginning, CigarOperator.H);
int contigIntervalStart = 1 + clippedNBasesFromStart;
// we are walking along the contig following the cigar, which indicates that we might be walking backwards on the reference if oneRegion.forwardStrand==false
int refBoundary1stInTheDirectionOfContig = oneRegion.forwardStrand ? oneRegion.referenceInterval.getStart() : oneRegion.referenceInterval.getEnd();
for (final CigarElement cigarElement : cigarElements) {
final CigarOperator op = cigarElement.getOperator();
final int operatorLen = cigarElement.getLength();
switch(op) {
case M:
case EQ:
case X:
case S:
case H:
cigarMemoryList.add(cigarElement);
break;
case I:
case D:
if (operatorLen < sensitivity) {
cigarMemoryList.add(cigarElement);
break;
}
// collapse cigar memory list into a single cigar for ref & contig interval computation
final Cigar memoryCigar = new Cigar(cigarMemoryList);
final int effectiveReadLen = memoryCigar.getReadLength() + SVVariantDiscoveryUtils.getTotalHardClipping(memoryCigar) - SVVariantDiscoveryUtils.getNumClippedBases(true, memoryCigar);
// task 1: infer reference interval taking into account of strand
final SimpleInterval referenceInterval;
if (oneRegion.forwardStrand) {
referenceInterval = new SimpleInterval(oneRegion.referenceInterval.getContig(), refBoundary1stInTheDirectionOfContig, refBoundary1stInTheDirectionOfContig + (memoryCigar.getReferenceLength() - 1));
} else {
referenceInterval = new SimpleInterval(oneRegion.referenceInterval.getContig(), // step backward
refBoundary1stInTheDirectionOfContig - (memoryCigar.getReferenceLength() - 1), refBoundary1stInTheDirectionOfContig);
}
// task 2: infer contig interval
final int contigIntervalEnd = contigIntervalStart + effectiveReadLen - 1;
// task 3: now add trailing cigar element and create the real cigar for the to-be-returned AR
cigarMemoryList.add(new CigarElement(unclippedContigLen - contigIntervalEnd - hardClippingAtEnd, CigarOperator.S));
if (hardClippingAtEnd != 0) {
// be faithful to hard clipping (as the accompanying bases have been hard-clipped away)
cigarMemoryList.add(new CigarElement(hardClippingAtEnd, CigarOperator.H));
}
final Cigar cigarForNewAlignmentInterval = new Cigar(cigarMemoryList);
final AlignedAssembly.AlignmentInterval split = new AlignedAssembly.AlignmentInterval(referenceInterval, contigIntervalStart, contigIntervalEnd, cigarForNewAlignmentInterval, oneRegion.forwardStrand, originalMapQ, SVConstants.DiscoveryStepConstants.ARTIFICIAL_MISMATCH);
result.add(split);
// update cigar memory
cigarMemoryList.clear();
if (hardClippingAtBeginningMaybeNull != null) {
// be faithful about hard clippings
cigarMemoryList.add(hardClippingAtBeginningMaybeNull);
}
cigarMemoryList.add(new CigarElement(contigIntervalEnd - hardClippingAtBeginning + (op.consumesReadBases() ? operatorLen : 0), CigarOperator.S));
// update pointers into reference and contig
final int refBoundaryAdvance = op.consumesReadBases() ? memoryCigar.getReferenceLength() : memoryCigar.getReferenceLength() + operatorLen;
refBoundary1stInTheDirectionOfContig += oneRegion.forwardStrand ? refBoundaryAdvance : -refBoundaryAdvance;
contigIntervalStart += op.consumesReadBases() ? effectiveReadLen + operatorLen : effectiveReadLen;
break;
default:
// TODO: 1/20/17 still not quite sure if this is quite right, it doesn't blow up on NA12878 WGS, but who knows what happens in the future
throw new GATKException("Alignment CIGAR contains an unexpected N or P element: " + oneRegion.toPackedString());
}
}
if (result.isEmpty()) {
return new ArrayList<>(Collections.singletonList(oneRegion));
}
final SimpleInterval lastReferenceInterval;
if (oneRegion.forwardStrand) {
lastReferenceInterval = new SimpleInterval(oneRegion.referenceInterval.getContig(), refBoundary1stInTheDirectionOfContig, oneRegion.referenceInterval.getEnd());
} else {
lastReferenceInterval = new SimpleInterval(oneRegion.referenceInterval.getContig(), oneRegion.referenceInterval.getStart(), refBoundary1stInTheDirectionOfContig);
}
final Cigar lastForwardStrandCigar = new Cigar(cigarMemoryList);
int clippedNBasesFromEnd = SVVariantDiscoveryUtils.getNumClippedBases(false, cigarElements);
result.add(new AlignedAssembly.AlignmentInterval(lastReferenceInterval, contigIntervalStart, unclippedContigLen - clippedNBasesFromEnd, lastForwardStrandCigar, oneRegion.forwardStrand, originalMapQ, SVConstants.DiscoveryStepConstants.ARTIFICIAL_MISMATCH));
return result;
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class BreakpointComplications method extractCigarForTandup.
/**
* Given a {@link AlignedAssembly.AlignmentInterval} from a pair of ARs that forms a {@link ChimericAlignment} signalling a tandem duplication,
* extract a CIGAR from the {@link AlignedAssembly.AlignmentInterval#cigarAlong5to3DirectionOfContig}
* that corresponds to the alignment between the suspected repeated sequence on reference between
* [{@code alignmentIntervalTwoReferenceIntervalSpanBegin}, {@code alignmentIntervalOneReferenceIntervalSpanEnd}],
* and the sequence in {@link AlignedAssembly.AlignmentInterval#referenceInterval}.
*/
@VisibleForTesting
static Cigar extractCigarForTandup(final AlignedAssembly.AlignmentInterval contigRegion, final int alignmentIntervalOneReferenceIntervalSpanEnd, final int alignmentIntervalTwoReferenceIntervalSpanBegin) {
final List<CigarElement> elementList = contigRegion.cigarAlong5to3DirectionOfContig.getCigarElements();
final List<CigarElement> result = new ArrayList<>(elementList.size());
final int refStart = contigRegion.referenceInterval.getStart(), refEnd = contigRegion.referenceInterval.getEnd();
final boolean isForwardStrand = contigRegion.forwardStrand;
boolean initiatedCollection = false;
int refPos = isForwardStrand ? refStart : refEnd;
for (final CigarElement cigarElement : elementList) {
final CigarOperator operator = cigarElement.getOperator();
if (!operator.isClipping()) {
final int opLen = cigarElement.getLength();
refPos += operator.consumesReferenceBases() ? (isForwardStrand ? opLen : -opLen) : 0;
final int offsetIntoRepeatRegion = isForwardStrand ? refPos - alignmentIntervalTwoReferenceIntervalSpanBegin : alignmentIntervalOneReferenceIntervalSpanEnd - refPos;
final int overshootOutOfRepeatRegion = isForwardStrand ? refPos - alignmentIntervalOneReferenceIntervalSpanEnd - 1 : alignmentIntervalTwoReferenceIntervalSpanBegin - refPos - 1;
if (offsetIntoRepeatRegion > 0) {
if (overshootOutOfRepeatRegion <= 0) {
result.add(initiatedCollection ? cigarElement : new CigarElement(offsetIntoRepeatRegion, operator));
initiatedCollection = true;
} else {
result.add(new CigarElement(opLen - overshootOutOfRepeatRegion, operator));
break;
}
}
}
}
return new Cigar(result);
}
Aggregations