Search in sources :

Example 6 with VariantContextComparator

use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.

the class GatherVcfs method gatherConventionally.

/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (createIndex)
        options.add(Options.INDEX_ON_THE_FLY);
    else
        options.remove(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
        final ProgressLogger progress = new ProgressLogger(log, 10000);
        VariantContext lastContext = null;
        Path lastFile = null;
        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;
        for (final Path f : inputFiles) {
            try {
                log.debug("Gathering from file: ", f.toUri().toString());
                final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
                final PeekableIterator<VariantContext> variantIterator;
                variantIterator = new PeekableIterator<>(variantReader.iterator());
                final VCFHeader header = (VCFHeader) variantReader.getHeader();
                if (firstHeader == null) {
                    firstHeader = header;
                    out.writeHeader(firstHeader);
                    comparator = new VariantContextComparator(firstHeader.getContigLines());
                }
                if (lastContext != null && variantIterator.hasNext()) {
                    final VariantContext vc = variantIterator.peek();
                    if (comparator.compare(vc, lastContext) <= 0) {
                        throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
                    }
                }
                while (variantIterator.hasNext()) {
                    lastContext = variantIterator.next();
                    out.add(lastContext);
                    progress.record(lastContext.getContig(), lastContext.getStart());
                }
                lastFile = f;
                CloserUtil.close(variantIterator);
                CloserUtil.close(variantReader);
            } catch (IOException e) {
                throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) Options(htsjdk.variant.variantcontext.writer.Options) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 7 with VariantContextComparator

use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.

the class GatherVcfs method assertSameSamplesAndValidOrdering.

/** Validates that all headers contain the same set of genotyped samples and that files are in order by position of first record. */
private static void assertSameSamplesAndValidOrdering(final List<Path> inputFiles) {
    final VCFHeader firstHeader = getHeader(inputFiles.get(0));
    final SAMSequenceDictionary dict = firstHeader.getSequenceDictionary();
    final VariantContextComparator comparator = new VariantContextComparator(firstHeader.getSequenceDictionary());
    final List<String> samples = firstHeader.getGenotypeSamples();
    Path lastFile = null;
    VariantContext lastContext = null;
    for (final Path f : inputFiles) {
        final FeatureReader<VariantContext> in = getReaderFromVCFUri(f, 0);
        VCFHeader header = (VCFHeader) in.getHeader();
        dict.assertSameDictionary(header.getSequenceDictionary());
        final List<String> theseSamples = header.getGenotypeSamples();
        if (!samples.equals(theseSamples)) {
            final SortedSet<String> s1 = new TreeSet<>(samples);
            final SortedSet<String> s2 = new TreeSet<>(theseSamples);
            s1.removeAll(theseSamples);
            s2.removeAll(samples);
            throw new IllegalArgumentException("VCFs do not have identical sample lists." + " Samples unique to first file: " + s1 + ". Samples unique to " + f.toUri().toString() + ": " + s2 + ".");
        }
        try (final CloseableIterator<VariantContext> variantIterator = in.iterator()) {
            if (variantIterator.hasNext()) {
                final VariantContext currentContext = variantIterator.next();
                if (lastContext != null) {
                    if (comparator.compare(lastContext, currentContext) >= 0) {
                        throw new IllegalArgumentException("First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString());
                    }
                }
                lastContext = currentContext;
                lastFile = f;
            }
        } catch (IOException e) {
            throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
        }
        CloserUtil.close(in);
    }
}
Also used : Path(java.nio.file.Path) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 8 with VariantContextComparator

use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.

the class AbstractConcordanceWalker method onStartup.

// ********** The basic traversal structure of GATKTool
@Override
protected final void onStartup() {
    super.onStartup();
    initializeTruthVariantsIfNecessary();
    evalVariants = new FeatureDataSource<>(new FeatureInput<>(evalVariantsFile, "eval"), CACHE_LOOKAHEAD, VariantContext.class);
    if (hasIntervals()) {
        truthVariants.setIntervalsForTraversal(intervalsForTraversal);
        evalVariants.setIntervalsForTraversal(intervalsForTraversal);
    }
    dict = getBestAvailableSequenceDictionary();
    variantContextComparator = new VariantContextComparator(dict);
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator)

Example 9 with VariantContextComparator

use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.

the class SortVcfTest method validateSortingResults.

/**
     * Checks the ordering and total number of variant context entries in the specified output VCF file.
     * Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
     *
     * @param output VCF file representing the output of SortVCF
     * @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
     */
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    int variantContextCount = 0;
    try (final CloseableIterator<VariantContext> iterator = outputReader.iterator()) {
        while (iterator.hasNext()) {
            final VariantContext outputContext = iterator.next();
            if (last != null)
                Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
            last = outputContext;
            variantContextCount++;
        }
    }
    Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator)

Example 10 with VariantContextComparator

use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.

the class AbstractVcfMergingClpTester method validateResultsForMultipleInputs.

private void validateResultsForMultipleInputs(final File output, final List<Queue<String>> positionQueues) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    try (final CloseableIterator<VariantContext> iterator = outputReader.iterator()) {
        while (iterator.hasNext()) {
            final VariantContext outputContext = iterator.next();
            final String position = getContigPosition(outputContext);
            for (final Queue<String> positionQueue : positionQueues) {
                if (position.equals(positionQueue.peek())) {
                    positionQueue.poll();
                    break;
                }
            }
            if (last != null)
                Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
            last = outputContext;
        }
    }
    for (final Queue<String> positionQueue : positionQueues) {
        Assert.assertEquals(positionQueue.size(), 0);
    }
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator)

Aggregations

VariantContextComparator (htsjdk.variant.variantcontext.VariantContextComparator)12 VariantContext (htsjdk.variant.variantcontext.VariantContext)11 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)7 VCFHeader (htsjdk.variant.vcf.VCFHeader)6 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)4 File (java.io.File)4 MergingIterator (htsjdk.samtools.util.MergingIterator)3 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)3 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)2 Path (java.nio.file.Path)2 Lists (com.google.common.collect.Lists)1 QueryInterval (htsjdk.samtools.QueryInterval)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1