use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class LocusIteratorByStateUnitTest method testWholeIndelReadRepresentedTest.
/**
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
*/
@Test
public void testWholeIndelReadRepresentedTest() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, secondLocus, 1);
read1.setBases(Utils.dupBytes((byte) 'A', 1));
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
read1.setCigar("1I");
List<GATKRead> reads = Arrays.asList(read1);
// create the iterator by state with the fake reads and fake records
LocusIteratorByState li;
li = makeLIBS(reads, null, false, header);
while (li.hasNext()) {
final AlignmentContext alignmentContext = li.next();
final ReadPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.size() == 1);
PileupElement pe = p.iterator().next();
Assert.assertTrue(pe.isBeforeInsertion());
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A");
}
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, secondLocus, 10);
read2.setBases(Utils.dupBytes((byte) 'A', 10));
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
read2.setCigar("10I");
reads = Arrays.asList(read2);
// create the iterator by state with the fake reads and fake records
li = makeLIBS(reads, null, false, header);
while (li.hasNext()) {
final AlignmentContext alignmentContext = li.next();
final ReadPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.size() == 1);
PileupElement pe = p.iterator().next();
Assert.assertTrue(pe.isBeforeInsertion());
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA");
}
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class ReferenceConfidenceModel method calculateRefConfidence.
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
*
* @param refHaplotype the reference haplotype, used to get the reference bases across activeRegion.getLoc()
* @param calledHaplotypes a list of haplotypes that segregate in this region, for realignment of the reads in the
* readLikelihoods, corresponding to each reads best haplotype. Must contain the refHaplotype.
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
* @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
* contexts as well as calls from variantCalls if any were provided
*/
public List<VariantContext> calculateRefConfidence(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final ReadLikelihoods<Haplotype> readLikelihoods, final PloidyModel ploidyModel, final List<VariantContext> variantCalls) {
Utils.nonNull(refHaplotype, "refHaplotype cannot be null");
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
Utils.nonNull(activeRegion, "activeRegion cannot be null");
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
Utils.validateArg(refHaplotype.length() == activeRegion.getExtendedSpan().size(), () -> "refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getSpan().size() + " are different");
Utils.nonNull(ploidyModel, "the ploidy model cannot be null");
// the first sample = the only sample in reference-confidence mode.
final int ploidy = ploidyModel.samplePloidy(0);
final SimpleInterval refSpan = activeRegion.getSpan();
final List<ReadPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.getSample(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedSpan().getStart();
for (final ReadPileup pileup : refPileups) {
final Locatable curPos = pileup.getLocation();
final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if (overlappingSite != null && overlappingSite.getStart() == curPos.getStart()) {
results.add(overlappingSite);
} else {
// otherwise emit a reference confidence variant context
results.add(makeReferenceConfidenceVariantContext(ploidy, ref, sampleName, globalRefOffset, pileup, curPos, offset));
}
}
return results;
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class ReferenceConfidenceModel method getPileupsOverReference.
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
final List<GATKRead> reads = activeRegion.getReads();
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
final int startPos = activeRegionSpan.getStart();
final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
AlignmentContext next = libs.advanceToLocus(startPos, true);
for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
if (next != null && next.getLocation().getStart() == curPos) {
pileups.add(next.getBasePileup());
next = libs.hasNext() ? libs.next() : null;
} else {
// no data, so we create empty pileups
pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
}
}
return pileups;
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class CalculateMixingFractions method apply.
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
if (!isBiallelicSingletonHetSnp(vc)) {
return;
}
final Optional<String> variantSample = StreamSupport.stream(vc.getGenotypes().spliterator(), false).filter(genotype -> genotype.isHet()).map(genotype -> genotype.getSampleName()).findFirst();
if (!variantSample.isPresent()) {
return;
}
final List<GATKRead> reads = new ArrayList<>();
final List<Integer> offsets = new ArrayList<>();
for (final GATKRead read : readsContext) {
if (read.failsVendorQualityCheck()) {
continue;
}
final AlignmentStateMachine asm = new AlignmentStateMachine(read);
while (asm.stepForwardOnGenome() != null && asm.getGenomePosition() < vc.getStart()) {
}
if (asm.getGenomePosition() == vc.getStart()) {
reads.add(read);
offsets.add(asm.getReadOffset());
}
}
final ReadPileup pileup = new ReadPileup(vc, reads, offsets);
final byte altBase = vc.getAlternateAllele(0).getBases()[0];
final long altCount = StreamSupport.stream(pileup.spliterator(), false).filter(pe -> pe.getBase() == altBase).count();
final long totalCount = pileup.size();
sampleCounts.get(variantSample.get()).addCounts(altCount, totalCount);
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class Pileup method apply.
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
final String features = getFeaturesString(featureContext);
final ReadPileup basePileup = alignmentContext.getBasePileup();
final StringBuilder s = new StringBuilder();
s.append(String.format("%s %s", basePileup.getPileupString((hasReference()) ? (char) referenceContext.getBase() : 'N'), features));
if (outputInsertLength) {
s.append(" ").append(insertLengthOutput(basePileup));
}
if (showVerbose) {
s.append(" ").append(createVerboseOutput(basePileup));
}
s.append("\n");
out.print(s.toString());
}
Aggregations