use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.
the class SortVcf method writeSortedOutput.
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
out.writeHeader(outputHeader);
for (final VariantContext variantContext : sortedOutput) {
out.add(variantContext);
writeProgress.record(variantContext.getContig(), variantContext.getStart());
}
out.close();
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.
the class CalculateGenotypePosteriors method onTraversalStart.
@Override
public void onTraversalStart() {
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(out).setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
if (hasReference()) {
vcfWriter = builder.setReferenceDictionary(getBestAvailableSequenceDictionary()).setOption(Options.INDEX_ON_THE_FLY).build();
} else {
vcfWriter = builder.unsetOption(Options.INDEX_ON_THE_FLY).build();
logger.info("Can't make an index for output file " + out + " because a reference dictionary is required for creating Tribble indices on the fly");
}
sampleDB = initializeSampleDB();
// Get list of samples to include in the output
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
final Set<String> vcfSamples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
//Get the trios from the families passed as ped
if (!skipFamilyPriors) {
final Set<Trio> trios = sampleDB.getTrios();
if (trios.isEmpty()) {
logger.info("No PED file passed or no *non-skipped* trios found in PED file. Skipping family priors.");
skipFamilyPriors = true;
}
}
final VCFHeader header = vcfHeaders.values().iterator().next();
if (!header.hasGenotypingData()) {
throw new UserException("VCF has no genotypes");
}
if (header.hasInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY)) {
final VCFInfoHeaderLine mleLine = header.getInfoHeaderLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
if (mleLine.getCountType() != VCFHeaderLineCount.A) {
throw new UserException("VCF does not have a properly formatted MLEAC field: the count type should be \"A\"");
}
if (mleLine.getType() != VCFHeaderLineType.Integer) {
throw new UserException("VCF does not have a properly formatted MLEAC field: the field type should be \"Integer\"");
}
}
// Initialize VCF header
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfHeaders.values(), true);
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
if (!skipFamilyPriors) {
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_LIKELIHOOD_TAG_NAME));
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_POSTERIOR_TAG_NAME));
}
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
final Map<String, Set<Sample>> families = sampleDB.getFamilies(vcfSamples);
famUtils = new FamilyLikelihoods(sampleDB, deNovoPrior, vcfSamples, families);
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.
the class GATKVariantContextUtils method createVCFWriter.
/**
* Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name.
* The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if
* <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args.
*
* @param outFile output File for this writer. May not be null.
* @param referenceDictionary required if on the fly indexing is set, otherwise can be null
* @param createMD5 true if an md5 file should be created
* @param options variable length list of additional Options to be set for this writer
* @returns VariantContextWriter must be closed by the caller
*/
public static VariantContextWriter createVCFWriter(final File outFile, final SAMSequenceDictionary referenceDictionary, final boolean createMD5, final Options... options) {
Utils.nonNull(outFile);
VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder().clearOptions().setOutputFile(outFile);
if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == getVariantFileTypeFromExtension(outFile)) {
// the only way the user has to specify an output type is by file extension, and htsjdk
// throws if it can't map the file extension to a known vcf type, so fallback to a default
// of VCF
logger.warn(String.format("Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.", FilenameUtils.getExtension(outFile.getPath())));
vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
}
if (createMD5) {
vcWriterBuilder.setCreateMD5();
}
if (null != referenceDictionary) {
vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary);
}
for (Options opt : options) {
vcWriterBuilder = vcWriterBuilder.setOption(opt);
}
return vcWriterBuilder.build();
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method openVCFWriter.
private VariantContextWriter openVCFWriter(final File outputFile, final Set<String> samples) {
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder();
builder.setOutputFile(outputFile);
builder.clearOptions();
final VariantContextWriter result = builder.build();
final VCFHeader header = new VCFHeader(Collections.emptySet(), samples);
CopyNumberTriStateAllele.addHeaderLinesTo(header);
EvaluationClass.addHeaderLinesTo(header);
// Format annotations.
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "Called genotype"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Quality of the call"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of called segments that overlap with the truth"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Called allele count for mixed calls"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Truth copy fraction estimated"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Truth call quality"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.EVALUATION_CLASS_KEY, 1, VCFHeaderLineType.Character, "The evaluation class for the call or lack of call. It the values of the header key '" + EvaluationClass.VCF_HEADER_KEY + "'"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "The truth genotype"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets covered by called segments"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "1 - The probability of th event in Phred scale (the maximum if ther are more than one segment"));
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "The quality of the call (the maximum if there are more than one segment"));
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Character, "Genotype filters"));
// Info annotations.
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the truth callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the truth callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the actual callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the actual callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets overlapped by this variant"));
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position for the variant"));
// Filter annotations.
for (final EvaluationFilter filter : EvaluationFilter.values()) {
header.addMetaDataLine(new VCFFilterHeaderLine(filter.name(), filter.description));
header.addMetaDataLine(new VCFFilterHeaderLine(filter.acronym, filter.description));
}
header.addMetaDataLine(new VCFFilterHeaderLine(EvaluationFilter.PASS, "Indicates that it passes all filters"));
result.writeHeader(header);
return result;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.
the class GatherVcfs method gatherConventionally.
/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
if (createIndex)
options.add(Options.INDEX_ON_THE_FLY);
else
options.remove(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
final ProgressLogger progress = new ProgressLogger(log, 10000);
VariantContext lastContext = null;
Path lastFile = null;
VCFHeader firstHeader = null;
VariantContextComparator comparator = null;
for (final Path f : inputFiles) {
try {
log.debug("Gathering from file: ", f.toUri().toString());
final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
final PeekableIterator<VariantContext> variantIterator;
variantIterator = new PeekableIterator<>(variantReader.iterator());
final VCFHeader header = (VCFHeader) variantReader.getHeader();
if (firstHeader == null) {
firstHeader = header;
out.writeHeader(firstHeader);
comparator = new VariantContextComparator(firstHeader.getContigLines());
}
if (lastContext != null && variantIterator.hasNext()) {
final VariantContext vc = variantIterator.peek();
if (comparator.compare(vc, lastContext) <= 0) {
throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
}
}
while (variantIterator.hasNext()) {
lastContext = variantIterator.next();
out.add(lastContext);
progress.record(lastContext.getContig(), lastContext.getStart());
}
lastFile = f;
CloserUtil.close(variantIterator);
CloserUtil.close(variantReader);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
}
}
}
}
Aggregations