Search in sources :

Example 11 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class FilterVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final List<VariantFilter> variantFilters = CollectionUtil.makeList(new AlleleBalanceFilter(MIN_AB), new FisherStrandFilter(MAX_FS), new QdFilter(MIN_QD));
    final List<GenotypeFilter> genotypeFilters = CollectionUtil.makeList(new GenotypeQualityFilter(MIN_GQ), new DepthFilter(MIN_DP));
    try (final VCFFileReader in = new VCFFileReader(INPUT, false)) {
        final FilterApplyingVariantIterator iterator = new FilterApplyingVariantIterator(in.iterator(), variantFilters, genotypeFilters);
        try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(OUTPUT).build()) {
            final VCFHeader header = in.getFileHeader();
            header.addMetaDataLine(new VCFFilterHeaderLine("AllGtsFiltered", "Site filtered out because all genotypes are filtered out."));
            header.addMetaDataLine(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters."));
            for (final VariantFilter filter : variantFilters) {
                for (final VCFFilterHeaderLine line : filter.headerLines()) {
                    header.addMetaDataLine(line);
                }
            }
            out.writeHeader(in.getFileHeader());
            while (iterator.hasNext()) {
                out.add(iterator.next());
            }
        }
    }
    return null;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine)

Example 12 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class MakeSitesOnlyVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter writer = builder.build()) {
        final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
        writer.writeHeader(header);
        // Go through the input, strip the records and write them to the output
        final CloseableIterator<VariantContext> iterator = reader.iterator();
        while (iterator.hasNext()) {
            final VariantContext full = iterator.next();
            final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
            writer.add(site);
            progress.record(site.getContig(), site.getStart());
        }
        CloserUtil.close(iterator);
        CloserUtil.close(reader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 13 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class MergeVcfs method doWork.

@Override
protected Object doWork() {
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final List<String> sampleList = new ArrayList<>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }
    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }
        if (sequenceDictionary == null)
            sequenceDictionary = fileHeader.getSequenceDictionary();
        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    }
    try (final VariantContextWriter writer = builder.build()) {
        writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
        final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
        while (mergingIterator.hasNext()) {
            final VariantContext context = mergingIterator.next();
            writer.add(context);
            progress.record(context.getContig(), context.getStart());
        }
        CloserUtil.close(mergingIterator);
    }
    return null;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 14 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class RenameSampleInVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    try (final VCFFileReader in = new VCFFileReader(INPUT)) {
        final VCFHeader header = in.getFileHeader();
        Utils.validateArg(header.getGenotypeSamples().size() == 1, "Input VCF must be single-sample.");
        Utils.validateArg(OLD_SAMPLE_NAME == null || OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0)), () -> "Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
        final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
        if (CREATE_INDEX)
            options.add(Options.INDEX_ON_THE_FLY);
        else
            options.remove(Options.INDEX_ON_THE_FLY);
        final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
        try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).setOptions(options).build()) {
            out.writeHeader(outHeader);
            for (final VariantContext ctx : in) {
                out.add(ctx);
            }
        }
    }
    return null;
}
Also used : Options(htsjdk.variant.variantcontext.writer.Options) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 15 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class SortVcf method collectFileReadersAndHeaders.

private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
    for (final File input : INPUT) {
        final VCFFileReader in = new VCFFileReader(input, false);
        final VCFHeader header = in.getFileHeader();
        final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
        if (dict == null || dict.isEmpty()) {
            if (null == samSequenceDictionary) {
                throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
            }
            header.setSequenceDictionary(samSequenceDictionary);
        } else {
            if (null == samSequenceDictionary) {
                samSequenceDictionary = dict;
            } else {
                try {
                    samSequenceDictionary.assertSameDictionary(dict);
                } catch (final AssertionError e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
        if (sampleList.isEmpty()) {
            sampleList.addAll(header.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(header.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
            }
        }
        inputReaders.add(in);
        inputHeaders.add(header);
    }
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34