Search in sources :

Example 1 with VCFHeaderLine

use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.

the class VariantRecalibrator method onTraversalStart.

//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
    if (gatk3Compatibility) {
        // Temporary argument for validation of GATK4 implementation against GATK3 results:
        // Reset the RNG and draw a single int to align the RNG initial state with that used
        // by GATK3 to allow comparison of results with GATK3
        Utils.resetRandomGenerator();
        Utils.getRandomGenerator().nextInt();
    }
    dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
    if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
        Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
    if (IGNORE_INPUT_FILTERS != null) {
        ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
    }
    try {
        tranchesStream = new PrintStream(TRANCHES_FILE);
    } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
    }
    for (FeatureInput<VariantContext> variantFeature : resource) {
        dataManager.addTrainingSet(new TrainingSet(variantFeature));
    }
    if (!dataManager.checkHasTrainingSet()) {
        throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
    }
    if (!dataManager.checkHasTruthSet()) {
        throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
    }
    //TODO: this should be refactored/consolidated as part of
    // https://github.com/broadinstitute/gatk/issues/2112
    // https://github.com/broadinstitute/gatk/issues/121,
    // https://github.com/broadinstitute/gatk/issues/1116 and
    // Initialize VCF header lines
    Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
    VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    if (hasReference()) {
        hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
    } else if (null != sequenceDictionary) {
        hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
    }
    recalWriter = createVCFWriter(new File(output));
    recalWriter.writeHeader(new VCFHeader(hInfo));
    for (int iii = 0; iii < REPLICATE * 2; iii++) {
        replicate.add(Utils.getRandomGenerator().nextDouble());
    }
}
Also used : PrintStream(java.io.PrintStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ExpandingArrayList(org.broadinstitute.hellbender.utils.collections.ExpandingArrayList) FileNotFoundException(java.io.FileNotFoundException) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 2 with VCFHeaderLine

use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.

the class SelectVariants method onTraversalStart.

/**
     * Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
     *
     */
@Override
public void onTraversalStart() {
    final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
    // Initialize VCF header lines
    final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
    for (int i = 0; i < selectExpressions.size(); i++) {
        // It's not necessary that the user supply select names for the JEXL expressions, since those
        // expressions will only be needed for omitting records.  Make up the select names here.
        selectNames.add(String.format("select-%d", i));
    }
    jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
    // Prepare the sample names and types to be used by the corresponding filters
    samples = createSampleNameInclusionList(vcfHeaders);
    selectedTypes = createSampleTypeInclusionList();
    // Look at the parameters to decide which analysis to perform
    discordanceOnly = discordanceTrack != null;
    if (discordanceOnly) {
        logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
    }
    concordanceOnly = concordanceTrack != null;
    if (concordanceOnly) {
        logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
    }
    if (mendelianViolations) {
        sampleDB = initializeSampleDB();
        mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
    }
    selectRandomFraction = fractionRandom > 0;
    if (selectRandomFraction) {
        logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
    }
    //TODO: this should be refactored/consolidated as part of
    // https://github.com/broadinstitute/gatk/issues/121 and
    // https://github.com/broadinstitute/gatk/issues/1116
    Set<VCFHeaderLine> actualLines = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (hasReference()) {
        File refFile = referenceArguments.getReferenceFile();
        sequenceDictionary = this.getReferenceDictionary();
        actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
    } else {
        sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
        if (null != sequenceDictionary) {
            actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
        } else {
            actualLines = headerLines;
        }
    }
    vcfWriter = createVCFWriter(outFile);
    vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) MendelianViolation(org.broadinstitute.hellbender.utils.samples.MendelianViolation) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File)

Example 3 with VCFHeaderLine

use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.

the class ReferenceConfidenceModelUnitTest method testGetHeaderLines.

@Test
public void testGetHeaderLines() throws Exception {
    final Set<VCFHeaderLine> vcfHeaderLines = model.getVCFHeaderLines();
    Assert.assertEquals(vcfHeaderLines.size(), 1);
    Assert.assertEquals(vcfHeaderLines.iterator().next(), new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with VCFHeaderLine

use of htsjdk.variant.vcf.VCFHeaderLine in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModel method getVCFHeaderLines.

/**
     * Get the VCF header lines to include when emitting reference confidence values via {@link #calculateRefConfidence}.
     * @return a non-null set of VCFHeaderLines
     */
public Set<VCFHeaderLine> getVCFHeaderLines() {
    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
    headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
    return headerLines;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine)

Example 5 with VCFHeaderLine

use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.

the class FilterMutectCalls method onTraversalStart.

@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add);
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination"));
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    vcfWriter = createVCFWriter(new File(outputVcf));
    vcfWriter.writeHeader(vcfHeader);
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Aggregations

VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)19 VCFHeader (htsjdk.variant.vcf.VCFHeader)11 VCFSimpleHeaderLine (htsjdk.variant.vcf.VCFSimpleHeaderLine)5 File (java.io.File)5 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 Test (org.testng.annotations.Test)4 VCFContigHeaderLine (htsjdk.variant.vcf.VCFContigHeaderLine)3 HashSet (java.util.HashSet)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)2 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)2 MutableLong (org.apache.commons.lang.mutable.MutableLong)2 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 VCFIDHeaderLine (htsjdk.variant.vcf.VCFIDHeaderLine)1 FileNotFoundException (java.io.FileNotFoundException)1 PrintStream (java.io.PrintStream)1 HashMap (java.util.HashMap)1 CommandLineException (org.broadinstitute.barclay.argparser.CommandLineException)1