Search in sources :

Example 6 with VariantContextWriterBuilder

use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.

the class SortVcf method writeSortedOutput.

private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}
Also used : Options(htsjdk.variant.variantcontext.writer.Options) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 7 with VariantContextWriterBuilder

use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.

the class CalculateGenotypePosteriors method onTraversalStart.

@Override
public void onTraversalStart() {
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(out).setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
    if (hasReference()) {
        vcfWriter = builder.setReferenceDictionary(getBestAvailableSequenceDictionary()).setOption(Options.INDEX_ON_THE_FLY).build();
    } else {
        vcfWriter = builder.unsetOption(Options.INDEX_ON_THE_FLY).build();
        logger.info("Can't make an index for output file " + out + " because a reference dictionary is required for creating Tribble indices on the fly");
    }
    sampleDB = initializeSampleDB();
    // Get list of samples to include in the output
    final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
    final Set<String> vcfSamples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
    //Get the trios from the families passed as ped
    if (!skipFamilyPriors) {
        final Set<Trio> trios = sampleDB.getTrios();
        if (trios.isEmpty()) {
            logger.info("No PED file passed or no *non-skipped* trios found in PED file. Skipping family priors.");
            skipFamilyPriors = true;
        }
    }
    final VCFHeader header = vcfHeaders.values().iterator().next();
    if (!header.hasGenotypingData()) {
        throw new UserException("VCF has no genotypes");
    }
    if (header.hasInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY)) {
        final VCFInfoHeaderLine mleLine = header.getInfoHeaderLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
        if (mleLine.getCountType() != VCFHeaderLineCount.A) {
            throw new UserException("VCF does not have a properly formatted MLEAC field: the count type should be \"A\"");
        }
        if (mleLine.getType() != VCFHeaderLineType.Integer) {
            throw new UserException("VCF does not have a properly formatted MLEAC field: the field type should be \"Integer\"");
        }
    }
    // Initialize VCF header
    final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfHeaders.values(), true);
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
    if (!skipFamilyPriors) {
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_LIKELIHOOD_TAG_NAME));
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_POSTERIOR_TAG_NAME));
    }
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
    final Map<String, Set<Sample>> families = sampleDB.getFamilies(vcfSamples);
    famUtils = new FamilyLikelihoods(sampleDB, deNovoPrior, vcfSamples, families);
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 8 with VariantContextWriterBuilder

use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.

the class GATKVariantContextUtils method createVCFWriter.

/**
     * Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name.
     * The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if
     * <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args.
     *
     * @param outFile output File for this writer. May not be null.
     * @param referenceDictionary required if on the fly indexing is set, otherwise can be null
     * @param createMD5 true if an md5 file should be created
     * @param options variable length list of additional Options to be set for this writer
     * @returns VariantContextWriter must be closed by the caller
     */
public static VariantContextWriter createVCFWriter(final File outFile, final SAMSequenceDictionary referenceDictionary, final boolean createMD5, final Options... options) {
    Utils.nonNull(outFile);
    VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder().clearOptions().setOutputFile(outFile);
    if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == getVariantFileTypeFromExtension(outFile)) {
        // the only way the user has to specify an output type is by file extension, and htsjdk
        // throws if it can't map the file extension to a known vcf type, so fallback to a default
        // of VCF
        logger.warn(String.format("Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.", FilenameUtils.getExtension(outFile.getPath())));
        vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
    }
    if (createMD5) {
        vcWriterBuilder.setCreateMD5();
    }
    if (null != referenceDictionary) {
        vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary);
    }
    for (Options opt : options) {
        vcWriterBuilder = vcWriterBuilder.setOption(opt);
    }
    return vcWriterBuilder.build();
}
Also used : Options(htsjdk.variant.variantcontext.writer.Options) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)

Example 9 with VariantContextWriterBuilder

use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCalls method openVCFWriter.

private VariantContextWriter openVCFWriter(final File outputFile, final Set<String> samples) {
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder();
    builder.setOutputFile(outputFile);
    builder.clearOptions();
    final VariantContextWriter result = builder.build();
    final VCFHeader header = new VCFHeader(Collections.emptySet(), samples);
    CopyNumberTriStateAllele.addHeaderLinesTo(header);
    EvaluationClass.addHeaderLinesTo(header);
    // Format annotations.
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "Called genotype"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Quality of the call"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of called segments that overlap with the truth"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Called allele count for mixed calls"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Truth copy fraction estimated"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Truth call quality"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.EVALUATION_CLASS_KEY, 1, VCFHeaderLineType.Character, "The evaluation class for the call or lack of call. It the values of the header key '" + EvaluationClass.VCF_HEADER_KEY + "'"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "The truth genotype"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets covered by called segments"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "1 - The probability of th event in Phred scale (the maximum if ther are more than one segment"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "The quality of the call (the maximum if there are more than one segment"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Character, "Genotype filters"));
    // Info annotations.
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the truth callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the truth callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the actual callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the actual callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets overlapped by this variant"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position for the variant"));
    // Filter annotations.
    for (final EvaluationFilter filter : EvaluationFilter.values()) {
        header.addMetaDataLine(new VCFFilterHeaderLine(filter.name(), filter.description));
        header.addMetaDataLine(new VCFFilterHeaderLine(filter.acronym, filter.description));
    }
    header.addMetaDataLine(new VCFFilterHeaderLine(EvaluationFilter.PASS, "Indicates that it passes all filters"));
    result.writeHeader(header);
    return result;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 10 with VariantContextWriterBuilder

use of htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder in project gatk by broadinstitute.

the class GatherVcfs method gatherConventionally.

/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (createIndex)
        options.add(Options.INDEX_ON_THE_FLY);
    else
        options.remove(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
        final ProgressLogger progress = new ProgressLogger(log, 10000);
        VariantContext lastContext = null;
        Path lastFile = null;
        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;
        for (final Path f : inputFiles) {
            try {
                log.debug("Gathering from file: ", f.toUri().toString());
                final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
                final PeekableIterator<VariantContext> variantIterator;
                variantIterator = new PeekableIterator<>(variantReader.iterator());
                final VCFHeader header = (VCFHeader) variantReader.getHeader();
                if (firstHeader == null) {
                    firstHeader = header;
                    out.writeHeader(firstHeader);
                    comparator = new VariantContextComparator(firstHeader.getContigLines());
                }
                if (lastContext != null && variantIterator.hasNext()) {
                    final VariantContext vc = variantIterator.peek();
                    if (comparator.compare(vc, lastContext) <= 0) {
                        throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
                    }
                }
                while (variantIterator.hasNext()) {
                    lastContext = variantIterator.next();
                    out.add(lastContext);
                    progress.record(lastContext.getContig(), lastContext.getStart());
                }
                lastFile = f;
                CloserUtil.close(variantIterator);
                CloserUtil.close(variantReader);
            } catch (IOException e) {
                throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) Options(htsjdk.variant.variantcontext.writer.Options) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Aggregations

VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)14 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 VCFHeader (htsjdk.variant.vcf.VCFHeader)7 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)6 UserException (org.broadinstitute.hellbender.exceptions.UserException)6 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)6 Options (htsjdk.variant.variantcontext.writer.Options)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 VariantContextComparator (htsjdk.variant.variantcontext.VariantContextComparator)2 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)2 ArrayList (java.util.ArrayList)2 PipelineOptions (com.google.cloud.dataflow.sdk.options.PipelineOptions)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 LiftOver (htsjdk.samtools.liftover.LiftOver)1 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)1 CloseableIterator (htsjdk.samtools.util.CloseableIterator)1 MergingIterator (htsjdk.samtools.util.MergingIterator)1 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)1 Allele (htsjdk.variant.variantcontext.Allele)1