use of org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition in project gatk by broadinstitute.
the class OrientationBiasFilterer method annotateVariantContextWithPreprocessingValues.
/** Adds the annotations that can be created for individual variants. The annotations placed on the genotypes here will be used by the filter.
* @param vc the variant to prepare for the orientation bias filter. Never {@code null}
* @param relevantTransitionsWithoutComplement the SNV artifact modes that we want to filter against. Do not include the complement. Never {@code null}
* @param preAdapterQScoreMap mapping from Artifact mode to the preAdapterQ score. Never {@code null}
* @return updated VariantContext with new genotypes already populated.
*/
public static VariantContext annotateVariantContextWithPreprocessingValues(final VariantContext vc, final SortedSet<Transition> relevantTransitionsWithoutComplement, final Map<Transition, Double> preAdapterQScoreMap) {
Utils.nonNull(vc);
Utils.nonNull(relevantTransitionsWithoutComplement);
Utils.nonNull(preAdapterQScoreMap);
final List<Transition> relevantTransitionsComplement = OrientationBiasUtils.createReverseComplementTransitions(relevantTransitionsWithoutComplement);
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
final GenotypesContext genotypesContext = vc.getGenotypes();
final List<Genotype> newGenotypes = new ArrayList<>();
for (Genotype genotype : genotypesContext.iterateInSampleNameOrder()) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype);
genotypeBuilder.attribute(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_ARTIFACT_MODE, String.valueOf(false));
genotypeBuilder.attribute(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_RC_ARTIFACT_MODE, String.valueOf(false));
final List<Allele> alleles = genotype.getAlleles();
if (genotype.getPloidy() != 2) {
logger.warn("No action required: This tool will skip non-diploid sites. Saw GT: " + genotype.getGenotypeString() + " at " + vc.toStringWithoutGenotypes());
}
// Get the reference allele as a String and make sure that there is only one ref allele and that it is length
// one, which would indicate that it could be a part of a SNP/SNV
final List<String> refAlleles = alleles.stream().filter(a -> a.isReference()).map(a -> a.getBaseString()).collect(Collectors.toList());
if (((refAlleles.size() == 1) && (refAlleles.get(0).length() == 1))) {
final Character refAllele = (char) refAlleles.get(0).getBytes()[0];
// Since we only look at the first alt allele on a site, we do not need a for loop over all non-ref alleles, e.g. for (int i = 1; i < alleles.size(); i++) {
final Allele allele = genotype.getAllele(1);
if (allele.isCalled() && allele.isNonReference() && !allele.equals(Allele.SPAN_DEL) && allele.getBaseString().length() == 1) {
final Transition genotypeMode = Transition.transitionOf(refAllele, allele.getBaseString().charAt(0));
final boolean isRelevantArtifact = relevantTransitionsWithoutComplement.contains(genotypeMode);
final boolean isRelevantArtifactComplement = relevantTransitionsComplement.contains(genotypeMode);
genotypeBuilder.attribute(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_ARTIFACT_MODE, String.valueOf(isRelevantArtifact));
genotypeBuilder.attribute(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_RC_ARTIFACT_MODE, String.valueOf(isRelevantArtifactComplement));
genotypeBuilder.attribute(OrientationBiasFilterConstants.PRE_ADAPTER_METRIC_FIELD_NAME, preAdapterQScoreMap.getOrDefault(genotypeMode, PRE_ADAPTER_METRIC_NOT_ARTIFACT_SCORE));
genotypeBuilder.attribute(OrientationBiasFilterConstants.PRE_ADAPTER_METRIC_RC_FIELD_NAME, preAdapterQScoreMap.getOrDefault(genotypeMode.complement(), PRE_ADAPTER_METRIC_NOT_ARTIFACT_SCORE));
// FOB is the fraction of alt reads indicating orientation bias error (taking into account artifact mode complement).
if (isRelevantArtifact || isRelevantArtifactComplement) {
final int totalAltAlleleCount = genotype.hasAD() ? genotype.getAD()[1] : 0;
final double fob = calculateFob(genotype, isRelevantArtifact);
genotypeBuilder.attribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, ArtifactStatisticsScorer.calculateArtifactPValue(totalAltAlleleCount, (int) Math.round(fob * totalAltAlleleCount), BIAS_P));
genotypeBuilder.attribute(OrientationBiasFilterConstants.FOB, fob);
} else {
genotypeBuilder.attribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE);
genotypeBuilder.attribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE);
}
}
}
final Genotype newGenotype = genotypeBuilder.make();
newGenotypes.add(newGenotype);
}
vcb.genotypes(newGenotypes);
return vcb.make();
}
use of org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition in project gatk by broadinstitute.
the class OrientationBiasFilterer method createTransitionToNumCutPrePreAdapterQ.
private static Map<Transition, Long> createTransitionToNumCutPrePreAdapterQ(double fdrThresh, String sampleName, long unfilteredGenotypeCount, final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering, final Map<Transition, Long> transitionCount) {
final long allTransitionCount = transitionCount.values().stream().mapToLong(Long::longValue).sum();
final int totalNumToCut = calculateTotalNumToCut(fdrThresh, unfilteredGenotypeCount, genotypesToConsiderForFiltering);
logger.info(sampleName + ": Cutting (total) pre-preAdapterQ: " + String.valueOf(totalNumToCut));
// Adjust the number to cut based on artifact mode
final Map<Transition, Long> transitionNumToCut = new HashMap<>();
transitionCount.keySet().stream().forEach(transition -> transitionNumToCut.put(transition, 0L));
for (final Transition transition : transitionNumToCut.keySet()) {
transitionNumToCut.put(transition, Long.valueOf(Math.round(totalNumToCut * transitionCount.get(transition) / allTransitionCount)));
logger.info(sampleName + ": Cutting (" + transition + ") pre-preAdapterQ: " + transitionNumToCut.get(transition));
}
return transitionNumToCut;
}
use of org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition in project gatk by broadinstitute.
the class OrientationBiasFilterer method annotateVariantContextsWithFilterResults.
/** Filter genotypes with orientation bias filter while trying to keep the false discovery rate for the sample below the threshold specified.
*
* @param fdrThreshold Maximum FDR filter should allow.
* @param relevantTransitionsWithoutComplements Transitions to filter. Do not include complements. Never {@code null}.
* @param preAdapterQAnnotatedVariants Variant contexts that have already been annotated by {@link OrientationBiasFilterer#annotateVariantContextWithPreprocessingValues(VariantContext, SortedSet, Map)} Never {@code null}.
* @param preAdapterQScoreMap Mapping from Transition to the preAdapterQ score. relevantTransitions should be included as keys, but not required. Never {@code null}.
* @return The same variant contexts with the genotypes filter field populated with orientation bias filtering results. If the variant contexts have no samples, then the variant contexts are returned without any annotation.
*/
public static List<VariantContext> annotateVariantContextsWithFilterResults(final double fdrThreshold, final SortedSet<Transition> relevantTransitionsWithoutComplements, final List<VariantContext> preAdapterQAnnotatedVariants, final Map<Transition, Double> preAdapterQScoreMap) {
// This holds the relevantArtifact modes and the complements, since we need to filter both.
final SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.addAll(relevantTransitionsWithoutComplements);
relevantTransitions.addAll(OrientationBiasUtils.createReverseComplementTransitions(relevantTransitionsWithoutComplements));
if (preAdapterQAnnotatedVariants.size() == 0) {
logger.info("No samples found in this file. NO FILTERING BEING DONE.");
return preAdapterQAnnotatedVariants;
}
final List<String> sampleNames = preAdapterQAnnotatedVariants.get(0).getSampleNamesOrderedByName();
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = createSampleToGenotypeVariantContextSortedMap(sampleNames, preAdapterQAnnotatedVariants);
// This map will hold all updated genotypes (across samples)
final Map<VariantContext, List<Genotype>> newGenotypes = new HashMap<>();
// For each sample, perform the actual filtering.
for (final String sampleName : sampleNames) {
// Count the total number of unfiltered genotypes for this sample, regardless of artifact mode or not.
// I.e. only remove filtered or ref/ref genotypes and count the rest.
final long unfilteredGenotypeCount = OrientationBiasUtils.calculateUnfilteredNonRefGenotypeCount(preAdapterQAnnotatedVariants, sampleName);
final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering = sampleNameToVariants.get(sampleName);
// Save some time, especially for the normal sample
if (genotypesToConsiderForFiltering.keySet().size() == 0) {
logger.info(sampleName + ": Nothing to filter.");
continue;
}
// Populate counts of artifact modes. These are genotypes that we can potentially cut.
final Map<Transition, Long> transitionCount = createTransitionCountMap(relevantTransitions, genotypesToConsiderForFiltering);
// Global number of artifacts to cut (not adjusted for preAdapterQ)
final Map<Transition, Long> transitionNumToCut = createTransitionToNumCutPrePreAdapterQ(fdrThreshold, sampleName, unfilteredGenotypeCount, genotypesToConsiderForFiltering, transitionCount);
// Adjust the artifact mode to cut based on the preAdapterQ score from picard
for (final Transition transition : transitionNumToCut.keySet()) {
final Transition modeOrReverseComplement = relevantTransitionsWithoutComplements.contains(transition) ? transition : transition.complement();
final double suppression = ArtifactStatisticsScorer.calculateSuppressionFactorFromPreAdapterQ(preAdapterQScoreMap.get(modeOrReverseComplement));
transitionNumToCut.put(transition, Math.round(transitionNumToCut.get(transition) * suppression));
logger.info(sampleName + ": Cutting (" + transition + ") post-preAdapterQ: " + transitionNumToCut.get(transition));
}
// Add filtering results to genotypes and store a pair of genotype variant context, so that we can update variant contexts later.
logger.info(sampleName + ": Adding orientation bias filter results to genotypes...");
final Map<Transition, Long> transitionCutSoFar = new HashMap<>();
relevantTransitions.stream().forEach(transition -> transitionCutSoFar.put(transition, 0L));
for (final Genotype genotype : genotypesToConsiderForFiltering.keySet()) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype);
// Since we only have the ALT_F2R1 and ALT_F1R2 counts for the first alt allele, we will not consider a site where transition is not artifact mode in the first alt allele.
final Transition transition = Transition.transitionOf(genotype.getAllele(0).getBaseString().charAt(0), genotype.getAllele(1).getBaseString().charAt(0));
if (!transitionNumToCut.keySet().contains(transition)) {
logger.warn("Have to skip genotype: " + genotype + " since it does not have the artifact mode in the first alt allele. Total alleles: " + genotype.getAlleles().size());
} else {
final Double pValue = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, 0.0);
final Double fractionOfReadsSupportingOrientationBias = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.FOB, 0.0);
if (transitionCutSoFar.get(transition) < transitionNumToCut.get(transition)) {
final String updatedFilter = OrientationBiasUtils.addFilterToGenotype(genotype.getFilters(), OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
genotypeBuilder.filter(updatedFilter);
transitionCutSoFar.put(transition, transitionCutSoFar.get(transition) + 1);
logger.info("Cutting: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
} else {
// No need to do anything for the genotype filter, so just log it.
logger.info("Passing: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
}
}
newGenotypes.computeIfAbsent(genotypesToConsiderForFiltering.get(genotype), v -> new ArrayList<>()).add(genotypeBuilder.make());
}
}
// Create the final variants with the modified genotypes
logger.info("Updating genotypes and creating final list of variants...");
final List<VariantContext> finalVariants = new ArrayList<>();
final ProgressMeter resultProgressMeter = new ProgressMeter();
resultProgressMeter.start();
for (final VariantContext vc : preAdapterQAnnotatedVariants) {
if (newGenotypes.containsKey(vc)) {
final GenotypesContext gcc = GenotypesContext.copy(vc.getGenotypes());
final List<Genotype> newGenotypesForThisVariantContext = newGenotypes.get(vc);
newGenotypesForThisVariantContext.forEach(gcc::replace);
final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(vc).genotypes(gcc);
if (newGenotypesForThisVariantContext.stream().anyMatch(g -> (g != null) && (g.getFilters() != null) && (g.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)))) {
variantContextBuilder.filter(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
}
final VariantContext updatedVariantContext = variantContextBuilder.make();
finalVariants.add(updatedVariantContext);
} else {
finalVariants.add(vc);
}
resultProgressMeter.update(new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()));
}
resultProgressMeter.stop();
return finalVariants;
}
use of org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition in project gatk by broadinstitute.
the class OrientationBiasUtils method toOrientationSampleTransitionSummary.
private static OrientationSampleTransitionSummary toOrientationSampleTransitionSummary(final DataLine dataLine) {
final Transition am = Transition.valueOf(dataLine.get(OrientationBiasFilterSummaryTableColumn.ARTIFACT_MODE.toString()).replace(">", "to"));
final Transition amC = Transition.valueOf(dataLine.get(OrientationBiasFilterSummaryTableColumn.ARTIFACT_MODE_COMPLEMENT.toString()).replace(">", "to"));
return new OrientationSampleTransitionSummary(dataLine.get(OrientationBiasFilterSummaryTableColumn.SAMPLE), am, amC, Double.parseDouble(dataLine.get(OrientationBiasFilterSummaryTableColumn.OBQ)), Integer.parseInt(dataLine.get(OrientationBiasFilterSummaryTableColumn.NUM_OB)), Integer.parseInt(dataLine.get(OrientationBiasFilterSummaryTableColumn.NUM_FILTERED)), Integer.parseInt(dataLine.get(OrientationBiasFilterSummaryTableColumn.NUM_NOT_AM)), Integer.parseInt(dataLine.get(OrientationBiasFilterSummaryTableColumn.NUM_VARIANTS)));
}
use of org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition in project gatk by broadinstitute.
the class PreAdapterOrientationScorer method scoreOrientationBiasMetricsOverContext.
/** Scores all modes regardless of whether required in the input.
*
* @param metrics Can be read from a Picard PreAdapterDetail file. Never {@code null}
* @return mapping to score all orientation bias artifact modes to a PreAdapterQ score. This score can be used as a bam-file level score for level of artifacts.
*/
public static Map<Transition, Double> scoreOrientationBiasMetricsOverContext(final List<SequencingArtifactMetrics.PreAdapterDetailMetrics> metrics) {
Utils.nonNull(metrics, "Input metrics cannot be null");
// Artifact mode to a double
final Map<Transition, Double> result = new HashMap<>();
final Map<Transition, RealMatrix> counts = countOrientationBiasMetricsOverContext(metrics);
for (final Transition transition : counts.keySet()) {
final RealMatrix count = counts.get(transition);
final double totalBases = count.getEntry(PRO, REF) + count.getEntry(PRO, ALT) + count.getEntry(CON, REF) + count.getEntry(CON, ALT);
final double rawFractionPro = count.getEntry(PRO, ALT) / totalBases;
final double rawFractionCon = count.getEntry(CON, ALT) / totalBases;
final double baseScore = rawFractionPro - rawFractionCon;
final double score = QualityUtils.phredScaleErrorRate(Math.max(baseScore, MAX_BASE_SCORE));
result.put(transition, score);
}
return result;
}
Aggregations