use of org.broadinstitute.hellbender.engine.ProgressMeter in project gatk by broadinstitute.
the class CompareBaseQualities method doWork.
@Override
protected Object doWork() {
if (roundDown && (staticQuantizationQuals == null || staticQuantizationQuals.isEmpty())) {
throw new CommandLineException.BadArgumentValue("round_down_quantized", "true", "This option can only be used if static_quantized_quals is also used");
}
staticQuantizedMapping = constructStaticQuantizedMapping(staticQuantizationQuals, roundDown);
IOUtil.assertFileIsReadable(samFiles.get(0));
IOUtil.assertFileIsReadable(samFiles.get(1));
final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY);
final SamReader reader1 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(0));
final SamReader reader2 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(1));
final SecondaryOrSupplementarySkippingIterator it1 = new SecondaryOrSupplementarySkippingIterator(reader1.iterator());
final SecondaryOrSupplementarySkippingIterator it2 = new SecondaryOrSupplementarySkippingIterator(reader2.iterator());
final CompareMatrix finalMatrix = new CompareMatrix(staticQuantizedMapping);
final ProgressMeter progressMeter = new ProgressMeter(1.0);
progressMeter.start();
while (it1.hasCurrent() && it2.hasCurrent()) {
final SAMRecord read1 = it1.getCurrent();
final SAMRecord read2 = it2.getCurrent();
progressMeter.update(read1);
if (!read1.getReadName().equals(read2.getReadName())) {
throw new UserException.BadInput("files do not have the same exact order of reads:" + read1.getReadName() + " vs " + read2.getReadName());
}
finalMatrix.add(read1.getBaseQualities(), read2.getBaseQualities());
it1.advance();
it2.advance();
}
progressMeter.stop();
if (it1.hasCurrent() || it2.hasCurrent()) {
throw new UserException.BadInput("files do not have the same exact number of reads");
}
CloserUtil.close(reader1);
CloserUtil.close(reader2);
finalMatrix.printOutResults(outputFilename);
if (throwOnDiff && finalMatrix.hasNonDiagonalElements()) {
throw new UserException("Quality scores from the two BAMs do not match");
}
return finalMatrix.hasNonDiagonalElements() ? 1 : 0;
}
use of org.broadinstitute.hellbender.engine.ProgressMeter in project gatk-protected by broadinstitute.
the class OrientationBiasFilterer method createSampleToGenotypeVariantContextSortedMap.
/** Creates a map that includes the artifact mode complements.
*
* @param sampleNames Names of samples to generate the map.
* @param variants The associated VariantContexts. The given sample names should be included.
* @return a mapping from the sampleNames to the a sorted (by p_artifact score) map that associates genotypes to their enclosing variant context.
*/
public static Map<String, SortedMap<Genotype, VariantContext>> createSampleToGenotypeVariantContextSortedMap(final List<String> sampleNames, final Collection<VariantContext> variants) {
// Sorts in reverse order (highest p_artifact goes first and will not allow anything to be equal
// unless they share the same reference)
// Note the negative sign is to sort in reverse error.
final Comparator<Genotype> genotypePArtifactComparator = Comparator.comparingDouble((Genotype g) -> -OrientationBiasUtils.getGenotypeDouble(g, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, 0.0)).thenComparingInt(g -> g.hashCode());
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = new HashMap<>();
final ProgressMeter customProgressMeter = new ProgressMeter(0.1);
customProgressMeter.start();
// Make sure that the keys are sorted by cumulative probability of being an artifact.
for (final String sampleName : sampleNames) {
final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering = new TreeMap<>(genotypePArtifactComparator);
for (final VariantContext vc : variants) {
vc.getGenotypes(sampleName).stream().filter(g -> isFilteringCandidate(g, vc)).forEach(genotype -> genotypesToConsiderForFiltering.put(genotype, vc));
customProgressMeter.update(new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()));
}
sampleNameToVariants.put(sampleName, genotypesToConsiderForFiltering);
}
customProgressMeter.stop();
return sampleNameToVariants;
}
use of org.broadinstitute.hellbender.engine.ProgressMeter in project gatk-protected by broadinstitute.
the class OrientationBiasFilterer method annotateVariantContextsWithFilterResults.
/** Filter genotypes with orientation bias filter while trying to keep the false discovery rate for the sample below the threshold specified.
*
* @param fdrThreshold Maximum FDR filter should allow.
* @param relevantTransitionsWithoutComplements Transitions to filter. Do not include complements. Never {@code null}.
* @param preAdapterQAnnotatedVariants Variant contexts that have already been annotated by {@link OrientationBiasFilterer#annotateVariantContextWithPreprocessingValues(VariantContext, SortedSet, Map)} Never {@code null}.
* @param preAdapterQScoreMap Mapping from Transition to the preAdapterQ score. relevantTransitions should be included as keys, but not required. Never {@code null}.
* @return The same variant contexts with the genotypes filter field populated with orientation bias filtering results. If the variant contexts have no samples, then the variant contexts are returned without any annotation.
*/
public static List<VariantContext> annotateVariantContextsWithFilterResults(final double fdrThreshold, final SortedSet<Transition> relevantTransitionsWithoutComplements, final List<VariantContext> preAdapterQAnnotatedVariants, final Map<Transition, Double> preAdapterQScoreMap) {
// This holds the relevantArtifact modes and the complements, since we need to filter both.
final SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.addAll(relevantTransitionsWithoutComplements);
relevantTransitions.addAll(OrientationBiasUtils.createReverseComplementTransitions(relevantTransitionsWithoutComplements));
if (preAdapterQAnnotatedVariants.size() == 0) {
logger.info("No samples found in this file. NO FILTERING BEING DONE.");
return preAdapterQAnnotatedVariants;
}
final List<String> sampleNames = preAdapterQAnnotatedVariants.get(0).getSampleNamesOrderedByName();
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = createSampleToGenotypeVariantContextSortedMap(sampleNames, preAdapterQAnnotatedVariants);
// This map will hold all updated genotypes (across samples)
final Map<VariantContext, List<Genotype>> newGenotypes = new HashMap<>();
// For each sample, perform the actual filtering.
for (final String sampleName : sampleNames) {
// Count the total number of unfiltered genotypes for this sample, regardless of artifact mode or not.
// I.e. only remove filtered or ref/ref genotypes and count the rest.
final long unfilteredGenotypeCount = OrientationBiasUtils.calculateUnfilteredNonRefGenotypeCount(preAdapterQAnnotatedVariants, sampleName);
final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering = sampleNameToVariants.get(sampleName);
// Save some time, especially for the normal sample
if (genotypesToConsiderForFiltering.keySet().size() == 0) {
logger.info(sampleName + ": Nothing to filter.");
continue;
}
// Populate counts of artifact modes. These are genotypes that we can potentially cut.
final Map<Transition, Long> transitionCount = createTransitionCountMap(relevantTransitions, genotypesToConsiderForFiltering);
// Global number of artifacts to cut (not adjusted for preAdapterQ)
final Map<Transition, Long> transitionNumToCut = createTransitionToNumCutPrePreAdapterQ(fdrThreshold, sampleName, unfilteredGenotypeCount, genotypesToConsiderForFiltering, transitionCount);
// Adjust the artifact mode to cut based on the preAdapterQ score from picard
for (final Transition transition : transitionNumToCut.keySet()) {
final Transition modeOrReverseComplement = relevantTransitionsWithoutComplements.contains(transition) ? transition : transition.complement();
final double suppression = ArtifactStatisticsScorer.calculateSuppressionFactorFromPreAdapterQ(preAdapterQScoreMap.get(modeOrReverseComplement));
transitionNumToCut.put(transition, Math.round(transitionNumToCut.get(transition) * suppression));
logger.info(sampleName + ": Cutting (" + transition + ") post-preAdapterQ: " + transitionNumToCut.get(transition));
}
// Add filtering results to genotypes and store a pair of genotype variant context, so that we can update variant contexts later.
logger.info(sampleName + ": Adding orientation bias filter results to genotypes...");
final Map<Transition, Long> transitionCutSoFar = new HashMap<>();
relevantTransitions.stream().forEach(transition -> transitionCutSoFar.put(transition, 0L));
for (final Genotype genotype : genotypesToConsiderForFiltering.keySet()) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype);
// Since we only have the ALT_F2R1 and ALT_F1R2 counts for the first alt allele, we will not consider a site where transition is not artifact mode in the first alt allele.
final Transition transition = Transition.transitionOf(genotype.getAllele(0).getBaseString().charAt(0), genotype.getAllele(1).getBaseString().charAt(0));
if (!transitionNumToCut.keySet().contains(transition)) {
logger.warn("Have to skip genotype: " + genotype + " since it does not have the artifact mode in the first alt allele. Total alleles: " + genotype.getAlleles().size());
} else {
final Double pValue = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, 0.0);
final Double fractionOfReadsSupportingOrientationBias = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.FOB, 0.0);
if (transitionCutSoFar.get(transition) < transitionNumToCut.get(transition)) {
final String updatedFilter = OrientationBiasUtils.addFilterToGenotype(genotype.getFilters(), OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
genotypeBuilder.filter(updatedFilter);
transitionCutSoFar.put(transition, transitionCutSoFar.get(transition) + 1);
logger.info("Cutting: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
} else {
// No need to do anything for the genotype filter, so just log it.
logger.info("Passing: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
}
}
newGenotypes.computeIfAbsent(genotypesToConsiderForFiltering.get(genotype), v -> new ArrayList<>()).add(genotypeBuilder.make());
}
}
// Create the final variants with the modified genotypes
logger.info("Updating genotypes and creating final list of variants...");
final List<VariantContext> finalVariants = new ArrayList<>();
final ProgressMeter resultProgressMeter = new ProgressMeter();
resultProgressMeter.start();
for (final VariantContext vc : preAdapterQAnnotatedVariants) {
if (newGenotypes.containsKey(vc)) {
final GenotypesContext gcc = GenotypesContext.copy(vc.getGenotypes());
final List<Genotype> newGenotypesForThisVariantContext = newGenotypes.get(vc);
newGenotypesForThisVariantContext.forEach(gcc::replace);
final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(vc).genotypes(gcc);
if (newGenotypesForThisVariantContext.stream().anyMatch(g -> (g != null) && (g.getFilters() != null) && (g.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)))) {
variantContextBuilder.filter(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
}
final VariantContext updatedVariantContext = variantContextBuilder.make();
finalVariants.add(updatedVariantContext);
} else {
finalVariants.add(vc);
}
resultProgressMeter.update(new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()));
}
resultProgressMeter.stop();
return finalVariants;
}
use of org.broadinstitute.hellbender.engine.ProgressMeter in project gatk by broadinstitute.
the class OrientationBiasFilterer method annotateVariantContextsWithFilterResults.
/** Filter genotypes with orientation bias filter while trying to keep the false discovery rate for the sample below the threshold specified.
*
* @param fdrThreshold Maximum FDR filter should allow.
* @param relevantTransitionsWithoutComplements Transitions to filter. Do not include complements. Never {@code null}.
* @param preAdapterQAnnotatedVariants Variant contexts that have already been annotated by {@link OrientationBiasFilterer#annotateVariantContextWithPreprocessingValues(VariantContext, SortedSet, Map)} Never {@code null}.
* @param preAdapterQScoreMap Mapping from Transition to the preAdapterQ score. relevantTransitions should be included as keys, but not required. Never {@code null}.
* @return The same variant contexts with the genotypes filter field populated with orientation bias filtering results. If the variant contexts have no samples, then the variant contexts are returned without any annotation.
*/
public static List<VariantContext> annotateVariantContextsWithFilterResults(final double fdrThreshold, final SortedSet<Transition> relevantTransitionsWithoutComplements, final List<VariantContext> preAdapterQAnnotatedVariants, final Map<Transition, Double> preAdapterQScoreMap) {
// This holds the relevantArtifact modes and the complements, since we need to filter both.
final SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.addAll(relevantTransitionsWithoutComplements);
relevantTransitions.addAll(OrientationBiasUtils.createReverseComplementTransitions(relevantTransitionsWithoutComplements));
if (preAdapterQAnnotatedVariants.size() == 0) {
logger.info("No samples found in this file. NO FILTERING BEING DONE.");
return preAdapterQAnnotatedVariants;
}
final List<String> sampleNames = preAdapterQAnnotatedVariants.get(0).getSampleNamesOrderedByName();
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = createSampleToGenotypeVariantContextSortedMap(sampleNames, preAdapterQAnnotatedVariants);
// This map will hold all updated genotypes (across samples)
final Map<VariantContext, List<Genotype>> newGenotypes = new HashMap<>();
// For each sample, perform the actual filtering.
for (final String sampleName : sampleNames) {
// Count the total number of unfiltered genotypes for this sample, regardless of artifact mode or not.
// I.e. only remove filtered or ref/ref genotypes and count the rest.
final long unfilteredGenotypeCount = OrientationBiasUtils.calculateUnfilteredNonRefGenotypeCount(preAdapterQAnnotatedVariants, sampleName);
final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering = sampleNameToVariants.get(sampleName);
// Save some time, especially for the normal sample
if (genotypesToConsiderForFiltering.keySet().size() == 0) {
logger.info(sampleName + ": Nothing to filter.");
continue;
}
// Populate counts of artifact modes. These are genotypes that we can potentially cut.
final Map<Transition, Long> transitionCount = createTransitionCountMap(relevantTransitions, genotypesToConsiderForFiltering);
// Global number of artifacts to cut (not adjusted for preAdapterQ)
final Map<Transition, Long> transitionNumToCut = createTransitionToNumCutPrePreAdapterQ(fdrThreshold, sampleName, unfilteredGenotypeCount, genotypesToConsiderForFiltering, transitionCount);
// Adjust the artifact mode to cut based on the preAdapterQ score from picard
for (final Transition transition : transitionNumToCut.keySet()) {
final Transition modeOrReverseComplement = relevantTransitionsWithoutComplements.contains(transition) ? transition : transition.complement();
final double suppression = ArtifactStatisticsScorer.calculateSuppressionFactorFromPreAdapterQ(preAdapterQScoreMap.get(modeOrReverseComplement));
transitionNumToCut.put(transition, Math.round(transitionNumToCut.get(transition) * suppression));
logger.info(sampleName + ": Cutting (" + transition + ") post-preAdapterQ: " + transitionNumToCut.get(transition));
}
// Add filtering results to genotypes and store a pair of genotype variant context, so that we can update variant contexts later.
logger.info(sampleName + ": Adding orientation bias filter results to genotypes...");
final Map<Transition, Long> transitionCutSoFar = new HashMap<>();
relevantTransitions.stream().forEach(transition -> transitionCutSoFar.put(transition, 0L));
for (final Genotype genotype : genotypesToConsiderForFiltering.keySet()) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype);
// Since we only have the ALT_F2R1 and ALT_F1R2 counts for the first alt allele, we will not consider a site where transition is not artifact mode in the first alt allele.
final Transition transition = Transition.transitionOf(genotype.getAllele(0).getBaseString().charAt(0), genotype.getAllele(1).getBaseString().charAt(0));
if (!transitionNumToCut.keySet().contains(transition)) {
logger.warn("Have to skip genotype: " + genotype + " since it does not have the artifact mode in the first alt allele. Total alleles: " + genotype.getAlleles().size());
} else {
final Double pValue = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, 0.0);
final Double fractionOfReadsSupportingOrientationBias = OrientationBiasUtils.getGenotypeDouble(genotype, OrientationBiasFilterConstants.FOB, 0.0);
if (transitionCutSoFar.get(transition) < transitionNumToCut.get(transition)) {
final String updatedFilter = OrientationBiasUtils.addFilterToGenotype(genotype.getFilters(), OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
genotypeBuilder.filter(updatedFilter);
transitionCutSoFar.put(transition, transitionCutSoFar.get(transition) + 1);
logger.info("Cutting: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
} else {
// No need to do anything for the genotype filter, so just log it.
logger.info("Passing: " + genotype.getSampleName() + " " + genotype.getAllele(0) + " " + genotype.getAllele(1) + " p=" + pValue + " Fob=" + fractionOfReadsSupportingOrientationBias);
}
}
newGenotypes.computeIfAbsent(genotypesToConsiderForFiltering.get(genotype), v -> new ArrayList<>()).add(genotypeBuilder.make());
}
}
// Create the final variants with the modified genotypes
logger.info("Updating genotypes and creating final list of variants...");
final List<VariantContext> finalVariants = new ArrayList<>();
final ProgressMeter resultProgressMeter = new ProgressMeter();
resultProgressMeter.start();
for (final VariantContext vc : preAdapterQAnnotatedVariants) {
if (newGenotypes.containsKey(vc)) {
final GenotypesContext gcc = GenotypesContext.copy(vc.getGenotypes());
final List<Genotype> newGenotypesForThisVariantContext = newGenotypes.get(vc);
newGenotypesForThisVariantContext.forEach(gcc::replace);
final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(vc).genotypes(gcc);
if (newGenotypesForThisVariantContext.stream().anyMatch(g -> (g != null) && (g.getFilters() != null) && (g.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)))) {
variantContextBuilder.filter(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT);
}
final VariantContext updatedVariantContext = variantContextBuilder.make();
finalVariants.add(updatedVariantContext);
} else {
finalVariants.add(vc);
}
resultProgressMeter.update(new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()));
}
resultProgressMeter.stop();
return finalVariants;
}
use of org.broadinstitute.hellbender.engine.ProgressMeter in project gatk by broadinstitute.
the class OrientationBiasFilterer method createSampleToGenotypeVariantContextSortedMap.
/** Creates a map that includes the artifact mode complements.
*
* @param sampleNames Names of samples to generate the map.
* @param variants The associated VariantContexts. The given sample names should be included.
* @return a mapping from the sampleNames to the a sorted (by p_artifact score) map that associates genotypes to their enclosing variant context.
*/
public static Map<String, SortedMap<Genotype, VariantContext>> createSampleToGenotypeVariantContextSortedMap(final List<String> sampleNames, final Collection<VariantContext> variants) {
// Sorts in reverse order (highest p_artifact goes first and will not allow anything to be equal
// unless they share the same reference)
// Note the negative sign is to sort in reverse error.
final Comparator<Genotype> genotypePArtifactComparator = Comparator.comparingDouble((Genotype g) -> -OrientationBiasUtils.getGenotypeDouble(g, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, 0.0)).thenComparingInt(g -> g.hashCode());
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = new HashMap<>();
final ProgressMeter customProgressMeter = new ProgressMeter(0.1);
customProgressMeter.start();
// Make sure that the keys are sorted by cumulative probability of being an artifact.
for (final String sampleName : sampleNames) {
final SortedMap<Genotype, VariantContext> genotypesToConsiderForFiltering = new TreeMap<>(genotypePArtifactComparator);
for (final VariantContext vc : variants) {
vc.getGenotypes(sampleName).stream().filter(g -> isFilteringCandidate(g, vc)).forEach(genotype -> genotypesToConsiderForFiltering.put(genotype, vc));
customProgressMeter.update(new SimpleInterval(vc.getContig(), vc.getStart(), vc.getEnd()));
}
sampleNameToVariants.put(sampleName, genotypesToConsiderForFiltering);
}
customProgressMeter.stop();
return sampleNameToVariants;
}
Aggregations