Search in sources :

Example 6 with VCFFileReader

use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.

the class SortVcf method sortInputs.

/**
     * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
     * <p/>
     * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
     * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
     * MergeVcfs exists for simple merging of presorted inputs.
     *
     * @param readers      - a list of VCFFileReaders, one for each input VCF
     * @param outputHeader - The merged header whose information we intend to use in the final output file
     */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        logger.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec)

Example 7 with VCFFileReader

use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.

the class SortVcf method collectFileReadersAndHeaders.

private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
    for (final File input : INPUT) {
        final VCFFileReader in = new VCFFileReader(input, false);
        final VCFHeader header = in.getFileHeader();
        final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
        if (dict == null || dict.isEmpty()) {
            if (null == samSequenceDictionary) {
                throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
            }
            header.setSequenceDictionary(samSequenceDictionary);
        } else {
            if (null == samSequenceDictionary) {
                samSequenceDictionary = dict;
            } else {
                try {
                    samSequenceDictionary.assertSameDictionary(dict);
                } catch (final AssertionError e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
        if (sampleList.isEmpty()) {
            sampleList.addAll(header.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(header.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
            }
        }
        inputReaders.add(in);
        inputHeaders.add(header);
    }
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 8 with VCFFileReader

use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.

the class DbSnpBitSetUtil method loadVcf.

/** Private helper method to read through the VCF and create one or more bit sets. */
private static void loadVcf(final File dbSnpFile, final SAMSequenceDictionary sequenceDictionary, final Map<DbSnpBitSetUtil, Set<DbSnpVariantType>> bitSetsToVariantTypes) {
    final VCFFileReader variantReader = new VCFFileReader(dbSnpFile);
    final CloseableIterator<VariantContext> variantIterator = variantReader.iterator();
    while (variantIterator.hasNext()) {
        final VariantContext kv = variantIterator.next();
        for (final Map.Entry<DbSnpBitSetUtil, Set<DbSnpVariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
            final DbSnpBitSetUtil bitset = tuple.getKey();
            final Set<DbSnpVariantType> variantsToMatch = tuple.getValue();
            BitSet bits = bitset.sequenceToBitSet.get(kv.getContig());
            if (bits == null) {
                final int nBits;
                if (sequenceDictionary == null)
                    nBits = kv.getEnd() + 1;
                else
                    nBits = sequenceDictionary.getSequence(kv.getContig()).getSequenceLength() + 1;
                bits = new BitSet(nBits);
                bitset.sequenceToBitSet.put(kv.getContig(), bits);
            }
            if (variantsToMatch.isEmpty() || (kv.isSNP() && variantsToMatch.contains(DbSnpVariantType.SNP)) || (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.insertion)) || (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.deletion))) {
                for (int i = kv.getStart(); i <= kv.getEnd(); i++) bits.set(i, true);
            }
        }
    }
    CloserUtil.close(variantIterator);
    CloserUtil.close(variantReader);
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext)

Example 9 with VCFFileReader

use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.

the class StructuralVariationDiscoveryPipelineSparkIntegrationTest method svDiscoveryVCFEquivalenceTest.

public static void svDiscoveryVCFEquivalenceTest(final String generatedVCFPath, final String expectedVCFPath, final List<String> attributesToIgnore, final boolean onHDFS) throws Exception {
    VCFFileReader fileReader;
    CloseableIterator<VariantContext> iterator;
    final List<VariantContext> actualVcs;
    if (onHDFS) {
        final File tempLocalVCF = BaseTest.createTempFile("variants", "vcf");
        tempLocalVCF.deleteOnExit();
        BucketUtils.copyFile(generatedVCFPath, tempLocalVCF.getAbsolutePath());
        fileReader = new VCFFileReader(tempLocalVCF, false);
    } else {
        fileReader = new VCFFileReader(new File(generatedVCFPath), false);
    }
    iterator = fileReader.iterator();
    actualVcs = Utils.stream(iterator).collect(Collectors.toList());
    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    fileReader = new VCFFileReader(new File(expectedVCFPath), false);
    iterator = fileReader.iterator();
    final List<VariantContext> expectedVcs = Utils.stream(iterator).collect(Collectors.toList());
    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    BaseTest.assertCondition(actualVcs, expectedVcs, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqual(a, e, attributesToIgnore));
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) File(java.io.File)

Example 10 with VCFFileReader

use of htsjdk.variant.vcf.VCFFileReader in project gatk by broadinstitute.

the class CreateSomaticPanelOfNormals method doWork.

public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }
    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();
    return "SUCCESS";
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Aggregations

VCFFileReader (htsjdk.variant.vcf.VCFFileReader)23 VariantContext (htsjdk.variant.variantcontext.VariantContext)16 File (java.io.File)11 VCFHeader (htsjdk.variant.vcf.VCFHeader)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)8 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)8 VariantContextComparator (htsjdk.variant.variantcontext.VariantContextComparator)6 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)6 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)5 ArrayList (java.util.ArrayList)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)4 Test (org.testng.annotations.Test)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)3 MergingIterator (htsjdk.samtools.util.MergingIterator)3 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)3 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)2 VCFRecordCodec (htsjdk.variant.vcf.VCFRecordCodec)2