use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class GatherVcfs method gatherConventionally.
/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
if (createIndex)
options.add(Options.INDEX_ON_THE_FLY);
else
options.remove(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
final ProgressLogger progress = new ProgressLogger(log, 10000);
VariantContext lastContext = null;
Path lastFile = null;
VCFHeader firstHeader = null;
VariantContextComparator comparator = null;
for (final Path f : inputFiles) {
try {
log.debug("Gathering from file: ", f.toUri().toString());
final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
final PeekableIterator<VariantContext> variantIterator;
variantIterator = new PeekableIterator<>(variantReader.iterator());
final VCFHeader header = (VCFHeader) variantReader.getHeader();
if (firstHeader == null) {
firstHeader = header;
out.writeHeader(firstHeader);
comparator = new VariantContextComparator(firstHeader.getContigLines());
}
if (lastContext != null && variantIterator.hasNext()) {
final VariantContext vc = variantIterator.peek();
if (comparator.compare(vc, lastContext) <= 0) {
throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
}
}
while (variantIterator.hasNext()) {
lastContext = variantIterator.next();
out.add(lastContext);
progress.record(lastContext.getContig(), lastContext.getStart());
}
lastFile = f;
CloserUtil.close(variantIterator);
CloserUtil.close(variantReader);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
}
}
}
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class GatherVcfs method assertSameSamplesAndValidOrdering.
/** Validates that all headers contain the same set of genotyped samples and that files are in order by position of first record. */
private static void assertSameSamplesAndValidOrdering(final List<Path> inputFiles) {
final VCFHeader firstHeader = getHeader(inputFiles.get(0));
final SAMSequenceDictionary dict = firstHeader.getSequenceDictionary();
final VariantContextComparator comparator = new VariantContextComparator(firstHeader.getSequenceDictionary());
final List<String> samples = firstHeader.getGenotypeSamples();
Path lastFile = null;
VariantContext lastContext = null;
for (final Path f : inputFiles) {
final FeatureReader<VariantContext> in = getReaderFromVCFUri(f, 0);
VCFHeader header = (VCFHeader) in.getHeader();
dict.assertSameDictionary(header.getSequenceDictionary());
final List<String> theseSamples = header.getGenotypeSamples();
if (!samples.equals(theseSamples)) {
final SortedSet<String> s1 = new TreeSet<>(samples);
final SortedSet<String> s2 = new TreeSet<>(theseSamples);
s1.removeAll(theseSamples);
s2.removeAll(samples);
throw new IllegalArgumentException("VCFs do not have identical sample lists." + " Samples unique to first file: " + s1 + ". Samples unique to " + f.toUri().toString() + ": " + s2 + ".");
}
try (final CloseableIterator<VariantContext> variantIterator = in.iterator()) {
if (variantIterator.hasNext()) {
final VariantContext currentContext = variantIterator.next();
if (lastContext != null) {
if (comparator.compare(lastContext, currentContext) >= 0) {
throw new IllegalArgumentException("First record in file " + f.toUri().toString() + " is not after first record in " + "previous file " + lastFile.toUri().toString());
}
}
lastContext = currentContext;
lastFile = f;
}
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
}
CloserUtil.close(in);
}
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class AbstractConcordanceWalker method onStartup.
// ********** The basic traversal structure of GATKTool
@Override
protected final void onStartup() {
super.onStartup();
initializeTruthVariantsIfNecessary();
evalVariants = new FeatureDataSource<>(new FeatureInput<>(evalVariantsFile, "eval"), CACHE_LOOKAHEAD, VariantContext.class);
if (hasIntervals()) {
truthVariants.setIntervalsForTraversal(intervalsForTraversal);
evalVariants.setIntervalsForTraversal(intervalsForTraversal);
}
dict = getBestAvailableSequenceDictionary();
variantContextComparator = new VariantContextComparator(dict);
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class SortVcfTest method validateSortingResults.
/**
* Checks the ordering and total number of variant context entries in the specified output VCF file.
* Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
*
* @param output VCF file representing the output of SortVCF
* @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
*/
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
final VCFFileReader outputReader = new VCFFileReader(output, false);
final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
VariantContext last = null;
int variantContextCount = 0;
try (final CloseableIterator<VariantContext> iterator = outputReader.iterator()) {
while (iterator.hasNext()) {
final VariantContext outputContext = iterator.next();
if (last != null)
Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
last = outputContext;
variantContextCount++;
}
}
Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class AbstractVcfMergingClpTester method validateResultsForMultipleInputs.
private void validateResultsForMultipleInputs(final File output, final List<Queue<String>> positionQueues) {
final VCFFileReader outputReader = new VCFFileReader(output, false);
final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
VariantContext last = null;
try (final CloseableIterator<VariantContext> iterator = outputReader.iterator()) {
while (iterator.hasNext()) {
final VariantContext outputContext = iterator.next();
final String position = getContigPosition(outputContext);
for (final Queue<String> positionQueue : positionQueues) {
if (position.equals(positionQueue.peek())) {
positionQueue.poll();
break;
}
}
if (last != null)
Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
last = outputContext;
}
}
for (final Queue<String> positionQueue : positionQueues) {
Assert.assertEquals(positionQueue.size(), 0);
}
}
Aggregations