use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalStart.
//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
if (gatk3Compatibility) {
// Temporary argument for validation of GATK4 implementation against GATK3 results:
// Reset the RNG and draw a single int to align the RNG initial state with that used
// by GATK3 to allow comparison of results with GATK3
Utils.resetRandomGenerator();
Utils.getRandomGenerator().nextInt();
}
dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
if (IGNORE_INPUT_FILTERS != null) {
ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
for (FeatureInput<VariantContext> variantFeature : resource) {
dataManager.addTrainingSet(new TrainingSet(variantFeature));
}
if (!dataManager.checkHasTrainingSet()) {
throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
if (!dataManager.checkHasTruthSet()) {
throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/2112
// https://github.com/broadinstitute/gatk/issues/121,
// https://github.com/broadinstitute/gatk/issues/1116 and
// Initialize VCF header lines
Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
if (hasReference()) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
} else if (null != sequenceDictionary) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
}
recalWriter = createVCFWriter(new File(output));
recalWriter.writeHeader(new VCFHeader(hInfo));
for (int iii = 0; iii < REPLICATE * 2; iii++) {
replicate.add(Utils.getRandomGenerator().nextDouble());
}
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class SelectVariants method onTraversalStart.
/**
* Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
*
*/
@Override
public void onTraversalStart() {
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
// Initialize VCF header lines
final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
for (int i = 0; i < selectExpressions.size(); i++) {
// It's not necessary that the user supply select names for the JEXL expressions, since those
// expressions will only be needed for omitting records. Make up the select names here.
selectNames.add(String.format("select-%d", i));
}
jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
// Prepare the sample names and types to be used by the corresponding filters
samples = createSampleNameInclusionList(vcfHeaders);
selectedTypes = createSampleTypeInclusionList();
// Look at the parameters to decide which analysis to perform
discordanceOnly = discordanceTrack != null;
if (discordanceOnly) {
logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
}
concordanceOnly = concordanceTrack != null;
if (concordanceOnly) {
logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
}
if (mendelianViolations) {
sampleDB = initializeSampleDB();
mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
}
selectRandomFraction = fractionRandom > 0;
if (selectRandomFraction) {
logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/121 and
// https://github.com/broadinstitute/gatk/issues/1116
Set<VCFHeaderLine> actualLines = null;
SAMSequenceDictionary sequenceDictionary = null;
if (hasReference()) {
File refFile = referenceArguments.getReferenceFile();
sequenceDictionary = this.getReferenceDictionary();
actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
} else {
sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
if (null != sequenceDictionary) {
actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
} else {
actualLines = headerLines;
}
}
vcfWriter = createVCFWriter(outFile);
vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class UpdateVCFSequenceDictionary method onTraversalStart.
@Override
public void onTraversalStart() {
VCFHeader inputHeader = getHeaderForVariants();
VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples());
getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
sourceDictionary = getSequenceDictionaryFromInput(dictionarySource);
// Warn and require opt-in via -replace if we're about to clobber a valid sequence
// dictionary. Check the input file directly via the header rather than using the
// engine, since it might dig one up from an index.
SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary();
if ((oldDictionary != null && !oldDictionary.getSequences().isEmpty()) && !replace) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME));
}
outputHeader.setSequenceDictionary(sourceDictionary);
vcfWriter = createVCFWriter(new File(outFile));
vcfWriter.writeHeader(outputHeader);
}
use of htsjdk.variant.vcf.VCFHeader in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method main.
public static void main(final String... args) throws IOException {
final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
final String refPath = cmd.getOptionValue(REF_PATH);
final String refSlicePath = cmd.getOptionValue(REF_SLICE);
final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
final String vcfPath = cmd.getOptionValue(VCF);
if (cmd.hasOption(PROXIMITY)) {
analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
}
if (cmd.hasOption(CONTAMINATION)) {
analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
}
if (refPath == null || tumorPath == null || vcfPath == null) {
printHelpAndExit(options);
return;
}
final File tumorBAM = new File(tumorPath);
final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
final File refBAM = new File(refPath);
final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
final File vcfFile = new File(vcfPath);
final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
if (samples.size() != 2) {
System.err.println("could not determine tumor and sample from VCF");
System.exit(1);
return;
}
TSVOutput.PrintHeaders();
final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
final Map<String, VariantContext> variantMap = new HashMap<>();
final List<VariantContext> variants = Lists.newArrayList();
for (VariantContext variant : vcfReader) {
variantMap.put(variant.getID(), variant);
final VariantContext mateVariant = variant;
if (variant.hasAttribute("MATEID")) {
variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
if (variant == null) {
continue;
}
}
final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
final Range uncertainty1 = extractCIPOS(variant);
final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
HMFVariantType svType;
final Location location2;
switch(variant.getStructuralVariantType()) {
case INS:
svType = HMFVariantType.INS;
location2 = location1.set(variant.getAttributeAsInt("END", 0));
break;
case INV:
if (variant.hasAttribute("INV3")) {
svType = HMFVariantType.INV3;
} else if (variant.hasAttribute("INV5")) {
svType = HMFVariantType.INV5;
} else {
System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
continue;
}
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DEL:
svType = HMFVariantType.DEL;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DUP:
svType = HMFVariantType.DUP;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case BND:
// process the breakend string
final String call = variant.getAlternateAllele(0).getDisplayString();
final String[] leftSplit = call.split("\\]");
final String[] rightSplit = call.split("\\[");
if (leftSplit.length >= 2) {
location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (leftSplit[0].length() > 0) {
svType = HMFVariantType.INV3;
uncertainty2 = Range.invert(uncertainty1);
} else {
svType = HMFVariantType.DUP;
uncertainty2 = uncertainty1;
}
} else if (rightSplit.length >= 2) {
location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (rightSplit[0].length() > 0) {
svType = HMFVariantType.DEL;
uncertainty2 = uncertainty1;
} else {
svType = HMFVariantType.INV5;
uncertainty2 = Range.invert(uncertainty1);
}
} else {
System.err.println(variant.getID() + " : could not parse breakpoint");
continue;
}
if (IMPRECISE) {
uncertainty2 = extractCIPOS(mateVariant);
}
break;
default:
System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
continue;
}
final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
ctx.Uncertainty1 = uncertainty1;
ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
} else {
ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
}
ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
switch(ctx.Type) {
case INS:
case DEL:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = -1;
break;
case INV3:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = 1;
break;
case INV5:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = -1;
break;
case DUP:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = 1;
break;
}
final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
combinedQueryIntervals.addAll(asList(result.QueryIntervals));
TSVOutput.print(variant, ctx, result);
final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
if (filters != null) {
filters.clear();
}
// we will map BreakpointError to a flag
if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
} else {
v.getCommonInfo().addFilters(result.Filters);
}
if (result.Filters.isEmpty()) {
final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
}
if (result.Breakpoints.getLeft() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
}
if (result.Breakpoints.getRight() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
}
// remove CIPOS / CIEND when we have an insert sequence
if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
v.getCommonInfo().removeAttribute("CIPOS");
v.getCommonInfo().removeAttribute("CIEND");
}
variants.add(v);
};
vcfUpdater.accept(variant, false);
if (mateVariant != variant) {
vcfUpdater.accept(mateVariant, true);
}
}
// TODO: update START, END with BPI values and save Manta values in new attributes
final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
if (vcfOutputPath != null) {
final VCFHeader header = vcfReader.getFileHeader();
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
Filter.UpdateVCFHeader(header);
AlleleFrequency.UpdateVCFHeader(header);
// setup VCF
final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
writer.writeHeader(header);
// write variants
variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
variants.forEach(writer::add);
writer.close();
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
if (tumorSlicePath != null) {
writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
}
if (refSlicePath != null) {
writeToSlice(refSlicePath, refReader, optimizedIntervals);
}
refReader.close();
tumorReader.close();
} catch (ParseException e) {
printHelpAndExit(options);
System.exit(1);
}
}
use of htsjdk.variant.vcf.VCFHeader in project hmftools by hartwigmedical.
the class MNVDetectorApplication method processVariants.
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed, boolean strelka) throws IOException {
final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
final VCFHeader outputHeader = strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf).setReferenceDictionary(outputHeader.getSequenceDictionary()).build();
vcfWriter.writeHeader(outputHeader);
Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
for (final VariantContext rawVariant : vcfReader) {
final VariantContext variant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;
final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
outputPair = MNVDetector.fitsMNVRegion(potentialMNVregion, variant);
outputPair.getRight().ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion, vcfWriter, bedWriter, "\n")));
}
filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
vcfWriter.close();
vcfReader.close();
bedWriter.close();
LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
Aggregations