use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalStart.
//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
if (gatk3Compatibility) {
// Temporary argument for validation of GATK4 implementation against GATK3 results:
// Reset the RNG and draw a single int to align the RNG initial state with that used
// by GATK3 to allow comparison of results with GATK3
Utils.resetRandomGenerator();
Utils.getRandomGenerator().nextInt();
}
dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
if (IGNORE_INPUT_FILTERS != null) {
ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
for (FeatureInput<VariantContext> variantFeature : resource) {
dataManager.addTrainingSet(new TrainingSet(variantFeature));
}
if (!dataManager.checkHasTrainingSet()) {
throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
if (!dataManager.checkHasTruthSet()) {
throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/2112
// https://github.com/broadinstitute/gatk/issues/121,
// https://github.com/broadinstitute/gatk/issues/1116 and
// Initialize VCF header lines
Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
if (hasReference()) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
} else if (null != sequenceDictionary) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
}
recalWriter = createVCFWriter(new File(output));
recalWriter.writeHeader(new VCFHeader(hInfo));
for (int iii = 0; iii < REPLICATE * 2; iii++) {
replicate.add(Utils.getRandomGenerator().nextDouble());
}
}
use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.
the class SelectVariants method onTraversalStart.
/**
* Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
*
*/
@Override
public void onTraversalStart() {
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
// Initialize VCF header lines
final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
for (int i = 0; i < selectExpressions.size(); i++) {
// It's not necessary that the user supply select names for the JEXL expressions, since those
// expressions will only be needed for omitting records. Make up the select names here.
selectNames.add(String.format("select-%d", i));
}
jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
// Prepare the sample names and types to be used by the corresponding filters
samples = createSampleNameInclusionList(vcfHeaders);
selectedTypes = createSampleTypeInclusionList();
// Look at the parameters to decide which analysis to perform
discordanceOnly = discordanceTrack != null;
if (discordanceOnly) {
logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
}
concordanceOnly = concordanceTrack != null;
if (concordanceOnly) {
logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
}
if (mendelianViolations) {
sampleDB = initializeSampleDB();
mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
}
selectRandomFraction = fractionRandom > 0;
if (selectRandomFraction) {
logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/121 and
// https://github.com/broadinstitute/gatk/issues/1116
Set<VCFHeaderLine> actualLines = null;
SAMSequenceDictionary sequenceDictionary = null;
if (hasReference()) {
File refFile = referenceArguments.getReferenceFile();
sequenceDictionary = this.getReferenceDictionary();
actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
} else {
sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
if (null != sequenceDictionary) {
actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
} else {
actualLines = headerLines;
}
}
vcfWriter = createVCFWriter(outFile);
vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testGetHeaderLines.
@Test
public void testGetHeaderLines() throws Exception {
final Set<VCFHeaderLine> vcfHeaderLines = model.getVCFHeaderLines();
Assert.assertEquals(vcfHeaderLines.size(), 1);
Assert.assertEquals(vcfHeaderLines.iterator().next(), new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
}
use of htsjdk.variant.vcf.VCFHeaderLine in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method getVCFHeaderLines.
/**
* Get the VCF header lines to include when emitting reference confidence values via {@link #calculateRefConfidence}.
* @return a non-null set of VCFHeaderLines
*/
public Set<VCFHeaderLine> getVCFHeaderLines() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
return headerLines;
}
use of htsjdk.variant.vcf.VCFHeaderLine in project gatk by broadinstitute.
the class FilterMutectCalls method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add);
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination"));
headerLines.addAll(getDefaultToolVCFHeaderLines());
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
vcfWriter = createVCFWriter(new File(outputVcf));
vcfWriter.writeHeader(vcfHeader);
}
Aggregations