use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class ASEReadCounter method apply.
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
final String contig = alignmentContext.getContig();
final long position = alignmentContext.getPosition();
final char refAllele = (char) referenceContext.getBase();
final List<VariantContext> VCs = featureContext.getValues(variants);
if (VCs != null && VCs.size() > 1) {
throw new UserException("More then one variant context at position: " + contig + ":" + position);
}
if (VCs == null || VCs.isEmpty()) {
return;
}
final VariantContext vc = VCs.get(0);
if (!vc.isBiallelic()) {
logger.warn("Ignoring site: cannot run ASE on non-biallelic sites: " + vc.toString());
return;
}
if (vc.getHetCount() < 1) {
logger.warn("Ignoring site: variant is not het at postion: " + contig + ":" + position);
return;
}
if (vc.getNAlleles() == 1 || vc.getAlternateAllele(0).getBases().length == 0) {
throw new UserException("The file of variant sites must contain heterozygous sites and cannot be a GVCF file containing <NON_REF> alleles.");
}
final char altAllele = (char) vc.getAlternateAllele(0).getBases()[0];
final String siteID = vc.getID();
final ReadPileup pileup = filterPileup(alignmentContext.getBasePileup(), countType);
// count up the depths of all and QC+ bases
final String line = calculateLineForSite(pileup, siteID, refAllele, altAllele);
if (line != null) {
outputStream.println(line);
}
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testCalcNIndelInformativeReads.
@Test(dataProvider = "CalcNIndelInformativeReadsData")
public void testCalcNIndelInformativeReads(final String readBases, final String ref, final int maxIndelSize, final List<Integer> expected) {
final byte qual = (byte) 30;
final byte[] quals = Utils.dupBytes(qual, readBases.length());
for (int i = 0; i < readBases.getBytes().length; i++) {
final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), quals, readBases.length() + "M");
final SimpleInterval loc = new SimpleInterval("20", i + 1, i + 1);
final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), i);
final int actual = model.calcNIndelInformativeReads(pileup, i, ref.getBytes(), maxIndelSize);
Assert.assertEquals(actual, (int) expected.get(i), "failed at position " + i);
}
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class HaplotypeCallerGenotypingEngineUnitTest method testAddMiscellaneousAllele.
@Test(dataProvider = "AddMiscellaneousDataProvider", enabled = false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset, final String ref, final int refOffset, final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
final byte baseQual = (byte) 30;
final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
final Locatable loc = new SimpleInterval("20", refOffset, refOffset);
final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), readOffset);
final VariantContextBuilder vcb = new VariantContextBuilder();
final GenotypeBuilder gb = new GenotypeBuilder();
final List<String> alleleStrings = new ArrayList<>(1 + alternatives.length);
alleleStrings.add(referenceAllele);
alleleStrings.addAll(Arrays.asList(alternatives));
gb.AD(new int[] { 1 });
gb.DP(1);
gb.PL(likelihoods);
vcb.alleles(alleleStrings);
vcb.loc("20", refOffset, refOffset + referenceAllele.length() - 1);
vcb.genotypes(gb.make());
final VariantContext vc = vcb.make();
// GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
final VariantContext updatedVc = null;
final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
Assert.assertEquals(updatedLikelihoodsArray[i], expected[i], 0.0001);
}
Allele altAllele = null;
for (final Allele allele : updatedVc.getAlleles()) if (allele.isSymbolic() && allele.getBaseString().equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME))
altAllele = allele;
Assert.assertNotNull(altAllele);
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method getPileupsOverReference.
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
final List<GATKRead> reads = activeRegion.getReads();
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
final int startPos = activeRegionSpan.getStart();
final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
AlignmentContext next = libs.advanceToLocus(startPos, true);
for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
if (next != null && next.getLocation().getStart() == curPos) {
pileups.add(next.getBasePileup());
next = libs.hasNext() ? libs.next() : null;
} else {
// no data, so we create empty pileups
pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
}
}
return pileups;
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method calculateRefConfidence.
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
*
* @param refHaplotype the reference haplotype, used to get the reference bases across activeRegion.getLoc()
* @param calledHaplotypes a list of haplotypes that segregate in this region, for realignment of the reads in the
* readLikelihoods, corresponding to each reads best haplotype. Must contain the refHaplotype.
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
* @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
* contexts as well as calls from variantCalls if any were provided
*/
public List<VariantContext> calculateRefConfidence(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final ReadLikelihoods<Haplotype> readLikelihoods, final PloidyModel ploidyModel, final List<VariantContext> variantCalls) {
Utils.nonNull(refHaplotype, "refHaplotype cannot be null");
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
Utils.nonNull(activeRegion, "activeRegion cannot be null");
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
Utils.validateArg(refHaplotype.length() == activeRegion.getExtendedSpan().size(), () -> "refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getSpan().size() + " are different");
Utils.nonNull(ploidyModel, "the ploidy model cannot be null");
// the first sample = the only sample in reference-confidence mode.
final int ploidy = ploidyModel.samplePloidy(0);
final SimpleInterval refSpan = activeRegion.getSpan();
final List<ReadPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.getSample(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedSpan().getStart();
for (final ReadPileup pileup : refPileups) {
final Locatable curPos = pileup.getLocation();
final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if (overlappingSite != null && overlappingSite.getStart() == curPos.getStart()) {
results.add(overlappingSite);
} else {
// otherwise emit a reference confidence variant context
results.add(makeReferenceConfidenceVariantContext(ploidy, ref, sampleName, globalRefOffset, pileup, curPos, offset));
}
}
return results;
}
Aggregations