Search in sources :

Example 91 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.

the class OrientationBiasFiltererUnitTest method testCreateSampleToGenotypeVCMap.

@Test
public void testCreateSampleToGenotypeVCMap() {
    // Setup the test
    final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2VcfMore));
    SortedSet<Transition> relevantTransitions = new TreeSet<>();
    relevantTransitions.add(Transition.transitionOf('G', 'T'));
    relevantTransitions.add(Transition.transitionOf('C', 'T'));
    final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
    final double amGTPreAdapterQ = 20.0;
    final double amCTPreAdapterQ = 25.0;
    // preAdapterQ suppression will do nothing.
    preAdapterQFakeScoreMap.put(relevantTransitions.first(), amGTPreAdapterQ);
    // preAdapterQ suppression will do nothing.
    preAdapterQFakeScoreMap.put(relevantTransitions.last(), amCTPreAdapterQ);
    final List<VariantContext> updatedVariants = new ArrayList<>();
    for (final VariantContext vc : featureDataSource) {
        final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);
        updatedVariants.add(updatedVariantContext);
    }
    final List<String> sampleNames = updatedVariants.get(0).getSampleNamesOrderedByName();
    // Do the test
    // Create a mapping from sample name to a genotype->variant context map with the second map sorted by p_artifact (i.e. OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME)
    final Map<String, SortedMap<Genotype, VariantContext>> sampleNameToVariants = OrientationBiasFilterer.createSampleToGenotypeVariantContextSortedMap(sampleNames, updatedVariants);
    Assert.assertEquals(sampleNameToVariants.keySet().size(), 2);
    Assert.assertTrue(sampleNameToVariants.keySet().contains("TUMOR"));
    Assert.assertTrue(sampleNameToVariants.keySet().contains("NORMAL"));
    Assert.assertEquals(sampleNameToVariants.get("TUMOR").keySet().size(), 8);
    // None of the normal genotypes should have a pvalue, so cannot/shouldn't be added to the sorted map
    Assert.assertEquals(sampleNameToVariants.get("NORMAL").keySet().size(), 0);
    // Check that the sorted map is getting smaller (or same) values of p_artifact and not staying put.
    double previousPArtifact = Double.POSITIVE_INFINITY;
    for (final Genotype genotypeTumor : sampleNameToVariants.get("TUMOR").keySet()) {
        final Double pArtifact = OrientationBiasUtils.getGenotypeDouble(genotypeTumor, OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, Double.POSITIVE_INFINITY);
        Assert.assertNotNull(pArtifact);
        Assert.assertTrue(pArtifact <= previousPArtifact);
        Assert.assertNotEquals(pArtifact, Double.POSITIVE_INFINITY);
    }
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Transition(org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition) File(java.io.File) FeatureDataSource(org.broadinstitute.hellbender.engine.FeatureDataSource) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 92 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class FilterByOrientationBiasIntegrationTest method testRun.

@Test
public void testRun() throws IOException {
    final File outputFile = File.createTempFile("ob_", ".vcf");
    final List<String> arguments = new ArrayList<>();
    arguments.add("-" + FilterByOrientationBias.PRE_ADAPTER_METRICS_DETAIL_FILE_SHORT_NAME);
    arguments.add(preAdapterQFile);
    arguments.add("-" + StandardArgumentDefinitions.VARIANT_SHORT_NAME);
    arguments.add(smallM2VcfMore);
    arguments.add("-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME);
    arguments.add(outputFile.getAbsolutePath());
    runCommandLine(arguments);
    Assert.assertTrue(outputFile.exists());
    final File summaryFile = new File(outputFile.getAbsolutePath() + FilterByOrientationBias.SUMMARY_FILE_SUFFIX);
    Assert.assertTrue(summaryFile.exists());
    final List<VariantContext> variantContexts = new ArrayList<>();
    final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(outputFile);
    for (final VariantContext vc : featureDataSource) {
        variantContexts.add(vc);
    }
    Assert.assertEquals(variantContexts.size(), 11);
    Assert.assertTrue(FileUtils.sizeOf(outputFile) > 0);
    Assert.assertTrue(FileUtils.sizeOf(summaryFile) > 0);
    boolean is_variant_context_tested = false;
    //  Also, make sure that the variant context has the filter as well.  Not just the genotypes.
    for (final VariantContext vc : variantContexts) {
        final Genotype tumorGenotype = vc.getGenotype("TUMOR");
        Assert.assertTrue((tumorGenotype.getFilters() == null) || (tumorGenotype.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)) || !OrientationBiasUtils.isGenotypeInTransitionWithComplement(tumorGenotype, Transition.transitionOf('G', 'T')));
        // If we see a filtered genotype, make sure the variant context was filtered as well.
        if ((tumorGenotype.getFilters() != null) && (tumorGenotype.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT))) {
            Assert.assertTrue(vc.getFilters().contains(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT));
            is_variant_context_tested = true;
        }
        final Genotype normalGenotype = vc.getGenotype("NORMAL");
        Assert.assertTrue((normalGenotype.getFilters() == null) || normalGenotype.getFilters().equals(VCFConstants.UNFILTERED) || normalGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4));
    }
    Assert.assertTrue(is_variant_context_tested, "Unit test may be broken.  Should have tested that variant context contained filter as well as genotype fields.");
    final List<OrientationSampleTransitionSummary> summaries = OrientationBiasUtils.readOrientationBiasSummaryTable(summaryFile);
    Assert.assertEquals(summaries.size(), 2);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).map(s -> s.getArtifactMode()).filter(am -> am.equals(Transition.GtoT)).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("NORMAL")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactMode()).filter(am -> am.equals(Transition.GtoT)).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
    Assert.assertEquals(summaries.stream().filter(s -> s.getArtifactModeComplement().equals(s.getArtifactMode().complement())).count(), summaries.size());
    Assert.assertEquals(summaries.stream().filter(s -> s.getSample().equals("TUMOR")).map(s -> s.getArtifactModeComplement()).filter(am -> am.equals(Transition.CtoA)).count(), 1);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) FileUtils(org.apache.commons.io.FileUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) OrientationBiasFilterConstants(org.broadinstitute.hellbender.tools.exome.orientationbiasvariantfilter.OrientationBiasFilterConstants) File(java.io.File) OrientationSampleTransitionSummary(org.broadinstitute.hellbender.tools.exome.orientationbiasvariantfilter.OrientationSampleTransitionSummary) ArrayList(java.util.ArrayList) List(java.util.List) Assert(org.testng.Assert) FeatureDataSource(org.broadinstitute.hellbender.engine.FeatureDataSource) VariantContext(htsjdk.variant.variantcontext.VariantContext) Transition(org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition) OrientationBiasUtils(org.broadinstitute.hellbender.tools.exome.orientationbiasvariantfilter.OrientationBiasUtils) VCFConstants(htsjdk.variant.vcf.VCFConstants) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) OrientationSampleTransitionSummary(org.broadinstitute.hellbender.tools.exome.orientationbiasvariantfilter.OrientationSampleTransitionSummary) File(java.io.File) FeatureDataSource(org.broadinstitute.hellbender.engine.FeatureDataSource) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 93 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class SelectVariants method isConcordant.

/**
     * Checks if the two variants have the same genotypes for the selected samples
     *
     * @param vc the variant rod VariantContext.
     * @param compVCs the comparison VariantContext
     * @return true if VariantContexts are concordant, false otherwise
     */
private boolean isConcordant(final VariantContext vc, final Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) {
        return false;
    }
    // if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
    if (noSamplesSpecified) {
        return true;
    }
    // make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
    final Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);
    // check if we can find all samples from the variant rod in the comp rod.
    for (final String sample : variantSamples) {
        boolean foundSample = false;
        for (final VariantContext compVC : compVCs) {
            final Genotype varG = vc.getGenotype(sample);
            final Genotype compG = compVC.getGenotype(sample);
            if (haveSameGenotypes(varG, compG)) {
                foundSample = true;
                break;
            }
        }
        // if at least one sample doesn't have the same genotype, we don't have concordance
        if (!foundSample) {
            return false;
        }
    }
    return true;
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype)

Example 94 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class SelectVariants method subsetRecord.

/**
     * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
     *
     * @param vc       the VariantContext record to subset
     * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
     * @param removeUnusedAlternates removes alternate alleles with AC=0
     * @return the subsetted VariantContext
     */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }
    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);
    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }
    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs : AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype : new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }
    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();
    return preserveAlleles ? subset : GATKVariantContextUtils.trimAlleles(subset, true, true);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) Allele(htsjdk.variant.variantcontext.Allele) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) GenotypeLikelihoods(htsjdk.variant.variantcontext.GenotypeLikelihoods) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) Argument(org.broadinstitute.barclay.argparser.Argument) GenotypeAssignmentMethod(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SampleDB(org.broadinstitute.hellbender.utils.samples.SampleDB) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) VariantIDsVariantFilter(org.broadinstitute.hellbender.engine.filters.VariantIDsVariantFilter) VariantContextUtils(htsjdk.variant.variantcontext.VariantContextUtils) SampleDBBuilder(org.broadinstitute.hellbender.utils.samples.SampleDBBuilder) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) VCFConstants(htsjdk.variant.vcf.VCFConstants) VariantWalker(org.broadinstitute.hellbender.engine.VariantWalker) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VariantFilterLibrary(org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary) Collectors(java.util.stream.Collectors) VariantFilter(org.broadinstitute.hellbender.engine.filters.VariantFilter) File(java.io.File) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) UserException(org.broadinstitute.hellbender.exceptions.UserException) AlleleSubsettingUtils(org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils) org.broadinstitute.hellbender.utils.variant(org.broadinstitute.hellbender.utils.variant) MendelianViolation(org.broadinstitute.hellbender.utils.samples.MendelianViolation) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantTypesVariantFilter(org.broadinstitute.hellbender.engine.filters.VariantTypesVariantFilter) ChromosomeCounts(org.broadinstitute.hellbender.tools.walkers.annotator.ChromosomeCounts) PedigreeValidationType(org.broadinstitute.hellbender.utils.samples.PedigreeValidationType) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFUtils(htsjdk.variant.vcf.VCFUtils) Utils(org.broadinstitute.hellbender.utils.Utils) Hidden(org.broadinstitute.barclay.argparser.Hidden) FeatureInput(org.broadinstitute.hellbender.engine.FeatureInput) ReadsContext(org.broadinstitute.hellbender.engine.ReadsContext) Pattern(java.util.regex.Pattern) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder)

Example 95 with Genotype

use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.

the class Mutect2FilteringEngine method applyStrandArtifactFilter.

private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
    Genotype tumorGenotype = vc.getGenotype(tumorSample);
    final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
    final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);
    if (posteriorProbabilities == null || mapAlleleFractionEstimates == null) {
        return;
    }
    final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);
    if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()) {
        return;
    }
    if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD && mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD) {
        filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype)

Aggregations

Genotype (htsjdk.variant.variantcontext.Genotype)150 VariantContext (htsjdk.variant.variantcontext.VariantContext)97 Allele (htsjdk.variant.variantcontext.Allele)82 ArrayList (java.util.ArrayList)54 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)52 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)51 File (java.io.File)48 VCFHeader (htsjdk.variant.vcf.VCFHeader)46 IOException (java.io.IOException)45 Collectors (java.util.stream.Collectors)42 Test (org.testng.annotations.Test)37 HashSet (java.util.HashSet)35 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)29 List (java.util.List)29 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)27 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)25 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)23 HashMap (java.util.HashMap)23 java.util (java.util)22 Set (java.util.Set)22