use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class GenotypingEngine method calculateGenotypes.
/**
* Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC).
*
* @param features Features
* @param refContext Reference context
* @param rawContext Raw context
* @param stratifiedContexts Stratified alignment contexts
* @param vc Input VC
* @param model GL calculation model
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
* @return VC with assigned genotypes
*/
protected VariantCallContext calculateGenotypes(final FeatureContext features, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final boolean inheritAttributesFromInputVC, final ReadLikelihoods<Allele> likelihoods, final SAMFileHeader header) {
final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null;
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) {
return emptyCallContext(features, refContext, rawContext, header);
}
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
VariantContext reducedVC = vc;
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
}
final AFCalculator afCalculator = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles);
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc, defaultPloidy, model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
// posterior probability that at least one alt allele exists in the samples
final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
// Add 0.0 removes -0.0 occurrences.
final double phredScaledConfidence = (-10.0 * log10Confidence) + 0.0;
// skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
if (!passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
final int INDEX_FOR_AC_EQUALS_1 = 1;
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
}
// start constructing the resulting VC
final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);
builder.log10PError(log10Confidence);
if (!passesCallThreshold(phredScaledConfidence)) {
builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
}
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
// calculating strand bias involves overwriting data structures, so we do it last
final Map<String, Object> attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()), genotypes, model, likelihoods);
VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make();
if (annotationEngine != null && !limitedContext) {
// limitedContext callers need to handle annotations on their own by calling their own annotationEngine
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
final ReadPileup pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header);
vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true);
}
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if (// limitedContext callers need to handle allele trimming on their own to keep their alleles in sync
outputAlleles.size() != vc.getAlleles().size() && !limitedContext) {
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
}
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele));
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class PileupUnitTest method testInsertLengthOutput.
@Test
public void testInsertLengthOutput() throws Exception {
// create two artifitial reads
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final SimpleInterval loc = new SimpleInterval("1:1");
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
read1.setFragmentLength(100);
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 1, 10);
read2.setFragmentLength(50);
// generate the pileups
final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1, read2), 1);
// test one read
Assert.assertEquals(Pileup.insertLengthOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read1"))), "100");
Assert.assertEquals(Pileup.insertLengthOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read2"))), "50");
// test two reads
Assert.assertEquals(Pileup.insertLengthOutput(pileup), "100,50");
// test an empty pileup
Assert.assertEquals(Pileup.insertLengthOutput(new ReadPileup(loc)), "");
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class LocusIteratorByState method lazyLoadNextAlignmentContext.
/**
* Creates the next alignment context from the given state. Note that this is implemented as a
* lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
* next entry.
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
readStates.collectPendingReads();
final Locatable location = getLocation();
final Map<String, ReadPileup> fullPileupPerSample = new LinkedHashMap<>();
for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates) {
final String sample = sampleStatePair.getKey();
final PerSampleReadStateManager readState = sampleStatePair.getValue();
final Iterator<AlignmentStateMachine> iterator = readState.iterator();
final List<PileupElement> pile = new ArrayList<>(readState.size());
while (iterator.hasNext()) {
// state object with the read/offset information
final AlignmentStateMachine state = iterator.next();
final GATKRead read = state.getRead();
final CigarOperator op = state.getCigarOperator();
if (!includeReadsWithNsAtLoci && op == CigarOperator.N) {
continue;
}
if (!dontIncludeReadInPileup(read, location.getStart())) {
if (!includeReadsWithDeletionAtLoci && op == CigarOperator.D) {
continue;
}
pile.add(state.makePileupElement());
}
}
if (!pile.isEmpty()) {
// if this pileup added at least one base, add it to the full pileup
fullPileupPerSample.put(sample, new ReadPileup(location, pile));
}
}
// critical - must be called after we get the current state offsets and location
readStates.updateReadStates();
if (!fullPileupPerSample.isEmpty()) {
// if we got reads with non-D/N over the current position, we are done
nextAlignmentContext = new AlignmentContext(location, new ReadPileup(location, fullPileupPerSample));
}
}
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class AlignmentContext method splitContextBySampleName.
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referenced by sample name instead
* of sample object.
*
* @param assumedSingleSample If non-null, assume this is the only sample in our pileup and return immediately.
* If null, get the list of samples from the provided header and do the work of splitting by sample.
* @return a Map of sample name to StratifiedAlignmentContext; samples without coverage are not included
**/
public Map<String, AlignmentContext> splitContextBySampleName(final String assumedSingleSample, final SAMFileHeader header) {
if (assumedSingleSample != null) {
return Collections.singletonMap(assumedSingleSample, this);
}
final Locatable loc = this.getLocation();
// this will throw an user error if there are samples without RG/sampleName
final Map<String, ReadPileup> pileups = this.getBasePileup().splitBySample(header, assumedSingleSample);
final Map<String, AlignmentContext> contexts = new LinkedHashMap<>(pileups.size());
for (final Map.Entry<String, ReadPileup> entry : pileups.entrySet()) {
// Don't add empty pileups to the split context.
if (entry.getValue().isEmpty()) {
continue;
}
contexts.put(entry.getKey(), new AlignmentContext(loc, entry.getValue()));
}
return contexts;
}
use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.
the class AlignmentContextUnitTest method testNoSample.
@Test(expectedExceptions = UserException.ReadMissingReadGroup.class)
public void testNoSample() throws Exception {
final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
header.addReadGroup(readGroupOne);
final Locatable loc = new SimpleInterval("chr1", 1, 1);
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
read1.setReadGroup(readGroupOne.getId());
final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1), 1);
final AlignmentContext ac = new AlignmentContext(loc, pileup);
ac.splitContextBySampleName(header);
}
Aggregations