Search in sources :

Example 16 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class MultiVariantDataSource method validateAllSequenceDictionaries.

/**
     * GATKTool only validates individual feature dictionaries against the reference dictionary, so cross-validate
     * all of the dictionaries against each other here by ensuring that each contig found in any dictionary has the
     * same length (and md5, when a value is present for that contig in both dictionaries) in every other dictionary
     * in which its present.
     */
private void validateAllSequenceDictionaries() {
    final Map<String, FeatureDataSource<VariantContext>> contigMap = new HashMap<>();
    featureDataSources.forEach(ds -> {
        final SAMSequenceDictionary dictionary = ds.getSequenceDictionary();
        if (dictionary == null) {
            logger.warn("A sequence dictionary is required for each input when using multiple inputs, and one could" + " not be obtained for feature input: " + ds.getName() + ". The input may not exist or may not have a valid header");
        } else {
            dictionary.getSequences().forEach(sourceSequence -> {
                final String sourceSequenceName = sourceSequence.getSequenceName();
                final FeatureDataSource<VariantContext> previousDataSource = contigMap.getOrDefault(sourceSequenceName, null);
                if (previousDataSource != null) {
                    final SAMSequenceDictionary previousDictionary = previousDataSource.getSequenceDictionary();
                    final SAMSequenceRecord previousSequence = previousDictionary.getSequence(sourceSequenceName);
                    validateSequenceDictionaryRecords(ds.getName(), dictionary, sourceSequence, previousDataSource.getName(), previousDictionary, previousSequence);
                } else {
                    contigMap.put(sourceSequenceName, ds);
                }
            });
        }
    });
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 17 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class GatherVcfs method getReaderFromVCFUri.

private static FeatureReader<VariantContext> getReaderFromVCFUri(final Path variantPath, final int cloudPrefetchBuffer) {
    final String variantURI = variantPath.toUri().toString();
    final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper = (cloudPrefetchBuffer > 0 ? is -> SeekableByteChannelPrefetcher.addPrefetcher(cloudPrefetchBuffer, is) : Function.identity());
    return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false, cloudWrapper, Function.identity());
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) Argument(org.broadinstitute.barclay.argparser.Argument) FeatureReader(htsjdk.tribble.FeatureReader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Function(java.util.function.Function) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) CollectionUtil(htsjdk.samtools.util.CollectionUtil) VCFCodec(htsjdk.variant.vcf.VCFCodec) PeekableIterator(htsjdk.samtools.util.PeekableIterator) PicardCommandLineProgram(org.broadinstitute.hellbender.cmdline.PicardCommandLineProgram) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) BlockCompressedStreamConstants(htsjdk.samtools.util.BlockCompressedStreamConstants) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Collectors(java.util.stream.Collectors) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SeekableByteChannel(java.nio.channels.SeekableByteChannel) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) Options(htsjdk.variant.variantcontext.writer.Options) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SeekableByteChannelPrefetcher(org.broadinstitute.hellbender.utils.nio.SeekableByteChannelPrefetcher) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) SeekableByteChannel(java.nio.channels.SeekableByteChannel) VCFCodec(htsjdk.variant.vcf.VCFCodec)

Example 18 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class FilterApplyingVariantIterator method next.

/**
     * Provides the next record from the underlying iterator after applying filter strings generated
     * by the set of filters in use by the iterator.
     */
@Override
public VariantContext next() {
    final VariantContext ctx = this.iterator.next();
    final Set<String> filterStrings = new HashSet<>();
    // Collect variant level filters
    for (final VariantFilter filter : this.filters) {
        final String val = filter.filter(ctx);
        if (val != null)
            filterStrings.add(val);
    }
    // Collect genotype level filters in a Map of Sample -> List<filter string>
    final ListMap<String, String> gtFilterStrings = new ListMap<>();
    final Set<String> variantSamples = new HashSet<>();
    for (final Genotype gt : ctx.getGenotypes()) {
        if (gt.isCalled() && !gt.isHomRef())
            variantSamples.add(gt.getSampleName());
        for (final GenotypeFilter filter : gtFilters) {
            final String filterString = filter.filter(ctx, gt);
            if (filterString != null)
                gtFilterStrings.add(gt.getSampleName(), filterString);
        }
    }
    // If all genotypes are filtered apply a site level filter
    if (gtFilterStrings.keySet().containsAll(variantSamples)) {
        filterStrings.add(ALL_GTS_FILTERED);
    }
    // Make a builder and set the site level filter appropriately
    final VariantContextBuilder builder = new VariantContextBuilder(ctx);
    if (filterStrings.isEmpty()) {
        builder.passFilters();
    } else {
        builder.filters(filterStrings);
    }
    // Apply filters to the necessary genotypes
    builder.noGenotypes();
    final List<Genotype> newGenotypes = new ArrayList<>(ctx.getNSamples());
    for (final Genotype gt : ctx.getGenotypes()) {
        final GenotypeBuilder gtBuilder = new GenotypeBuilder(gt);
        final List<String> filters = gtFilterStrings.get(gt.getSampleName());
        if (filters == null || filters.isEmpty()) {
            gtBuilder.filter(PASS_FILTER);
        } else {
            gtBuilder.filters(filters);
        }
        newGenotypes.add(gtBuilder.make());
    }
    builder.genotypes(newGenotypes);
    return builder.make();
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) ListMap(htsjdk.samtools.util.ListMap) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 19 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class MakeSitesOnlyVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter writer = builder.build()) {
        final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
        writer.writeHeader(header);
        // Go through the input, strip the records and write them to the output
        final CloseableIterator<VariantContext> iterator = reader.iterator();
        while (iterator.hasNext()) {
            final VariantContext full = iterator.next();
            final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
            writer.add(site);
            progress.record(site.getContig(), site.getStart());
        }
        CloserUtil.close(iterator);
        CloserUtil.close(reader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 20 with VariantContext

use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.

the class MergeVcfs method doWork.

@Override
protected Object doWork() {
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final List<String> sampleList = new ArrayList<>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }
    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }
        if (sequenceDictionary == null)
            sequenceDictionary = fileHeader.getSequenceDictionary();
        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    }
    try (final VariantContextWriter writer = builder.build()) {
        writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
        final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
        while (mergingIterator.hasNext()) {
            final VariantContext context = mergingIterator.next();
            writer.add(context);
            progress.record(context.getContig(), context.getStart());
        }
        CloserUtil.close(mergingIterator);
    }
    return null;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

VariantContext (htsjdk.variant.variantcontext.VariantContext)237 Test (org.testng.annotations.Test)119 File (java.io.File)98 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)68 Allele (htsjdk.variant.variantcontext.Allele)55 Genotype (htsjdk.variant.variantcontext.Genotype)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)43 Collectors (java.util.stream.Collectors)43 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)43 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)41 FeatureDataSource (org.broadinstitute.hellbender.engine.FeatureDataSource)36 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)35 java.util (java.util)33 IOException (java.io.IOException)25 Assert (org.testng.Assert)25 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)22 Target (org.broadinstitute.hellbender.tools.exome.Target)22 DataProvider (org.testng.annotations.DataProvider)22