use of org.broadinstitute.hellbender.engine.AlignmentContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method getPileupsOverReference.
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
final List<GATKRead> reads = activeRegion.getReads();
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
final int startPos = activeRegionSpan.getStart();
final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
AlignmentContext next = libs.advanceToLocus(startPos, true);
for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
if (next != null && next.getLocation().getStart() == curPos) {
pileups.add(next.getBasePileup());
next = libs.hasNext() ? libs.next() : null;
} else {
// no data, so we create empty pileups
pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
}
}
return pileups;
}
use of org.broadinstitute.hellbender.engine.AlignmentContext in project gatk by broadinstitute.
the class GenotypingEngine method emptyCallContext.
/**
* Produces an empty variant-call context to output when there is no enough data provided to call anything.
*
* @param features feature context
* @param ref the reference context.
* @param rawContext the read alignment at that location.
* @return it might be null if no enough information is provided to do even an empty call. For example when
* we have limited-context (i.e. any of the tracker, reference or alignment is {@code null}.
*/
protected final VariantCallContext emptyCallContext(final FeatureContext features, final ReferenceContext ref, final AlignmentContext rawContext, final SAMFileHeader header) {
if (features == null || ref == null || rawContext == null || !forceSiteEmission()) {
return null;
}
VariantContext vc;
if (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) {
final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features, rawContext.getLocation(), false, logger, configuration.alleles);
if (ggaVc == null) {
return null;
}
vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), ggaVc.getStart(), ggaVc.getEnd(), ggaVc.getAlleles()).make();
} else {
// deal with bad/non-standard reference bases
if (!Allele.acceptableAlleleBases(new byte[] { ref.getBase() })) {
return null;
}
final Set<Allele> alleles = new LinkedHashSet<>(Collections.singleton(Allele.create(ref.getBase(), true)));
vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), ref.getInterval().getStart(), ref.getInterval().getStart(), alleles).make();
}
if (vc != null && annotationEngine != null) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
final ReadPileup pileup = rawContext.getBasePileup();
vc = annotationEngine.annotateContext(vc, features, ref, null, a -> true);
}
return new VariantCallContext(vc, false);
}
use of org.broadinstitute.hellbender.engine.AlignmentContext in project gatk by broadinstitute.
the class GenotypingEngine method calculateGenotypes.
/**
* Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC).
*
* @param features Features
* @param refContext Reference context
* @param rawContext Raw context
* @param stratifiedContexts Stratified alignment contexts
* @param vc Input VC
* @param model GL calculation model
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
* @return VC with assigned genotypes
*/
protected VariantCallContext calculateGenotypes(final FeatureContext features, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final boolean inheritAttributesFromInputVC, final ReadLikelihoods<Allele> likelihoods, final SAMFileHeader header) {
final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null;
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) {
return emptyCallContext(features, refContext, rawContext, header);
}
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
VariantContext reducedVC = vc;
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
}
final AFCalculator afCalculator = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles);
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc, defaultPloidy, model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
// posterior probability that at least one alt allele exists in the samples
final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
// Add 0.0 removes -0.0 occurrences.
final double phredScaledConfidence = (-10.0 * log10Confidence) + 0.0;
// skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
if (!passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
final int INDEX_FOR_AC_EQUALS_1 = 1;
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
}
// start constructing the resulting VC
final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);
builder.log10PError(log10Confidence);
if (!passesCallThreshold(phredScaledConfidence)) {
builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
}
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
// calculating strand bias involves overwriting data structures, so we do it last
final Map<String, Object> attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()), genotypes, model, likelihoods);
VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make();
if (annotationEngine != null && !limitedContext) {
// limitedContext callers need to handle annotations on their own by calling their own annotationEngine
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
final ReadPileup pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header);
vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true);
}
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if (// limitedContext callers need to handle allele trimming on their own to keep their alleles in sync
outputAlleles.size() != vc.getAlleles().size() && !limitedContext) {
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
}
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele));
}
use of org.broadinstitute.hellbender.engine.AlignmentContext in project gatk by broadinstitute.
the class ArtificialReadPileupTestProvider method getAlignmentContextFromAlleles.
public Map<String, AlignmentContext> getAlignmentContextFromAlleles(final int eventLength, final String altBases, final int[] numReadsPerAllele, final boolean addBaseErrors, final int phredScaledBaseErrorRate) {
final String refChar = new String(new byte[] { referenceContext.getBase() });
String refAllele, altAllele;
if (eventLength == 0) {
// SNP case
refAllele = refChar;
altAllele = altBases.substring(0, 1);
} else if (eventLength > 0) {
// insertion
refAllele = refChar;
altAllele = refChar + altBases;
} else {
// deletion
refAllele = new String(referenceContext.getForwardBases()).substring(0, Math.abs(eventLength) + 1);
altAllele = refChar;
}
Map<String, AlignmentContext> contexts = new LinkedHashMap<>();
for (String sample : sampleNames) {
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
contexts.put(sample, context);
}
return contexts;
}
use of org.broadinstitute.hellbender.engine.AlignmentContext in project gatk by broadinstitute.
the class LocusIteratorByState method next.
/**
* Get the next AlignmentContext available from the reads.
*
* @return a non-null AlignmentContext of the pileup after to the next genomic position covered by
* at least one read.
*/
@Override
public AlignmentContext next() {
lazyLoadNextAlignmentContext();
if (!hasNext()) {
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
}
AlignmentContext currentAlignmentContext = nextAlignmentContext;
nextAlignmentContext = null;
return currentAlignmentContext;
}
Aggregations