use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class OrientationBiasFiltererUnitTest method testCreateSampleToGenotypeVCMap.
@Test
public void testCreateSampleToGenotypeVCMap() {
// Setup the test
final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2VcfMore));
SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.add(Transition.transitionOf('G', 'T'));
relevantTransitions.add(Transition.transitionOf('C', 'T'));
final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
final double amGTPreAdapterQ = 20.0;
final double amCTPreAdapterQ = 25.0;
// preAdapterQ suppression will do nothing.
preAdapterQFakeScoreMap.put(relevantTransitions.first(), amGTPreAdapterQ);
// preAdapterQ suppression will do nothing.
preAdapterQFakeScoreMap.put(relevantTransitions.last(), amCTPreAdapterQ);
final List<VariantContext> updatedVariants = new ArrayList<>();
for (final VariantContext vc : featureDataSource) {
final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);
updatedVariants.add(updatedVariantContext);
}
final List<String> sampleNames = updatedVariants.get(0).getSampleNamesOrderedByName();
// Do the test
// Create a mapping from sample name to a genotype->variant context map with the second map sorted by p_artifact (i.e. OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME)
final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = OrientationBiasFilterer.createSampleToGenotypeVariantContextSortedMap(sampleNames, updatedVariants);
Assert.assertEquals(sampleNameToVariants.keySet().size(), 2);
Assert.assertTrue(sampleNameToVariants.keySet().contains("TUMOR"));
Assert.assertTrue(sampleNameToVariants.keySet().contains("NORMAL"));
Assert.assertEquals(sampleNameToVariants.get("TUMOR").keySet().size(), 8);
// None of the normal genotypes should have a pvalue, so cannot/shouldn't be added to the sorted map
Assert.assertEquals(sampleNameToVariants.get("NORMAL").keySet().size(), 0);
// Check that the sorted map is getting smaller (or same) values of p_artifact and not staying put.
double previousPArtifact = Double.POSITIVE_INFINITY;
for (final Genotype genotypeTumor : sampleNameToVariants.get("TUMOR").keySet()) {
final Double pArtifact = OrientationBiasUtils.getGenotypeDouble(genotypeTumor, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, Double.POSITIVE_INFINITY);
Assert.assertNotNull(pArtifact);
Assert.assertTrue(pArtifact <= previousPArtifact);
Assert.assertNotEquals(pArtifact, Double.POSITIVE_INFINITY);
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class FilterByOrientationBiasIntegrationTest method testRun.
@Test
public void testRun() throws IOException {
final File outputFile = File.createTempFile("ob_", ".vcf");
final List<String> arguments = new ArrayList<>();
arguments.add("-" + FilterByOrientationBias.PRE_ADAPTER_METRICS_DETAIL_FILE_SHORT_NAME);
arguments.add(preAdapterQFile);
arguments.add("-" + StandardArgumentDefinitions.VARIANT_SHORT_NAME);
arguments.add(smallM2VcfMore);
arguments.add("-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME);
arguments.add(outputFile.getAbsolutePath());
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
final File summaryFile = new File(outputFile.getAbsolutePath() + FilterByOrientationBias.SUMMARY_FILE_SUFFIX);
Assert.assertTrue(summaryFile.exists());
final List<VariantContext> variantContexts = new ArrayList<>();
final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(outputFile);
for (final VariantContext vc : featureDataSource) {
variantContexts.add(vc);
}
Assert.assertEquals(variantContexts.size(), 11);
Assert.assertTrue(FileUtils.sizeOf(outputFile) > 0);
Assert.assertTrue(FileUtils.sizeOf(summaryFile) > 0);
boolean is_variant_context_tested = false;
// Also, make sure that the variant context has the filter as well. Not just the genotypes.
for (final VariantContext vc : variantContexts) {
final Genotype tumorGenotype = vc.getGenotype("TUMOR");
Assert.assertTrue((tumorGenotype.getFilters() == null) || (tumorGenotype.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)) || !OrientationBiasUtils.isGenotypeInTransitionWithComplement(tumorGenotype, Transition.transitionOf('G', 'T')));
// If we see a filtered genotype, make sure the variant context was filtered as well.
if ((tumorGenotype.getFilters() != null) && (tumorGenotype.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT))) {
Assert.assertTrue(vc.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT));
is_variant_context_tested = true;
}
final Genotype normalGenotype = vc.getGenotype("NORMAL");
Assert.assertTrue((normalGenotype.getFilters() == null) || normalGenotype.getFilters().equals(VCFConstants.UNFILTERED) || normalGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4));
}
Assert.assertTrue(is_variant_context_tested, "Unit test may be broken. Should have tested that variant context contained filter as well as genotype fields.");
final List<OrientationSampleTransitionSummary> summaries = OrientationBiasUtils.readOrientationBiasSummaryTable(summaryFile);
Assert.assertEquals(summaries.size(), 2);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).map(s -> s.getArtifactMode()).filter(am -> am.equals(Transition.GtoT)).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactMode()).filter(am -> am.equals(Transition.GtoT)).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
Assert.assertEquals(summaries.stream().filter(s -> s.getArtifactModeComplement().equals(s.getArtifactMode().complement())).count(), summaries.size());
Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class SelectVariants method isConcordant.
/**
* Checks if the two variants have the same genotypes for the selected samples
*
* @param vc the variant rod VariantContext.
* @param compVCs the comparison VariantContext
* @return true if VariantContexts are concordant, false otherwise
*/
private boolean isConcordant(final VariantContext vc, final Collection<VariantContext> compVCs) {
if (vc == null || compVCs == null || compVCs.isEmpty()) {
return false;
}
// if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
if (noSamplesSpecified) {
return true;
}
// make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
final Set<String> variantSamples = vc.getSampleNames();
variantSamples.retainAll(samples);
// check if we can find all samples from the variant rod in the comp rod.
for (final String sample : variantSamples) {
boolean foundSample = false;
for (final VariantContext compVC : compVCs) {
final Genotype varG = vc.getGenotype(sample);
final Genotype compG = compVC.getGenotype(sample);
if (haveSameGenotypes(varG, compG)) {
foundSample = true;
break;
}
}
// if at least one sample doesn't have the same genotype, we don't have concordance
if (!foundSample) {
return false;
}
}
return true;
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class SelectVariants method subsetRecord.
/**
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
* @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
* @param removeUnusedAlternates removes alternate alleles with AC=0
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
//subContextFromSamples() always decodes the vc, which is a fairly expensive operation. Avoid if possible
if (noSamplesSpecified && !removeUnusedAlternates) {
return vc;
}
// strip out the alternate alleles that aren't being used
final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);
// If no subsetting happened, exit now
if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
return vc;
}
// fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
final GenotypesContext oldGs = sub.getGenotypes();
GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs : AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
if (fractionGenotypes > 0) {
final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype : new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
newGC = GenotypesContext.create(new ArrayList<>(genotypes));
}
// since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
final VariantContextBuilder builder = new VariantContextBuilder(sub);
builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
builder.genotypes(newGC);
addAnnotations(builder, vc, sub.getSampleNames());
final VariantContext subset = builder.make();
return preserveAlleles ? subset : GATKVariantContextUtils.trimAlleles(subset, true, true);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class Mutect2FilteringEngine method applyStrandArtifactFilter.
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
Genotype tumorGenotype = vc.getGenotype(tumorSample);
final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);
if (posteriorProbabilities == null || mapAlleleFractionEstimates == null) {
return;
}
final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);
if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()) {
return;
}
if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD && mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD) {
filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
}
}
Aggregations