use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class MultiVariantDataSource method validateAllSequenceDictionaries.
/**
* GATKTool only validates individual feature dictionaries against the reference dictionary, so cross-validate
* all of the dictionaries against each other here by ensuring that each contig found in any dictionary has the
* same length (and md5, when a value is present for that contig in both dictionaries) in every other dictionary
* in which its present.
*/
private void validateAllSequenceDictionaries() {
final Map<String, FeatureDataSource<VariantContext>> contigMap = new HashMap<>();
featureDataSources.forEach(ds -> {
final SAMSequenceDictionary dictionary = ds.getSequenceDictionary();
if (dictionary == null) {
logger.warn("A sequence dictionary is required for each input when using multiple inputs, and one could" + " not be obtained for feature input: " + ds.getName() + ". The input may not exist or may not have a valid header");
} else {
dictionary.getSequences().forEach(sourceSequence -> {
final String sourceSequenceName = sourceSequence.getSequenceName();
final FeatureDataSource<VariantContext> previousDataSource = contigMap.getOrDefault(sourceSequenceName, null);
if (previousDataSource != null) {
final SAMSequenceDictionary previousDictionary = previousDataSource.getSequenceDictionary();
final SAMSequenceRecord previousSequence = previousDictionary.getSequence(sourceSequenceName);
validateSequenceDictionaryRecords(ds.getName(), dictionary, sourceSequence, previousDataSource.getName(), previousDictionary, previousSequence);
} else {
contigMap.put(sourceSequenceName, ds);
}
});
}
});
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class GatherVcfs method getReaderFromVCFUri.
private static FeatureReader<VariantContext> getReaderFromVCFUri(final Path variantPath, final int cloudPrefetchBuffer) {
final String variantURI = variantPath.toUri().toString();
final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper = (cloudPrefetchBuffer > 0 ? is -> SeekableByteChannelPrefetcher.addPrefetcher(cloudPrefetchBuffer, is) : Function.identity());
return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false, cloudWrapper, Function.identity());
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class FilterApplyingVariantIterator method next.
/**
* Provides the next record from the underlying iterator after applying filter strings generated
* by the set of filters in use by the iterator.
*/
@Override
public VariantContext next() {
final VariantContext ctx = this.iterator.next();
final Set<String> filterStrings = new HashSet<>();
// Collect variant level filters
for (final VariantFilter filter : this.filters) {
final String val = filter.filter(ctx);
if (val != null)
filterStrings.add(val);
}
// Collect genotype level filters in a Map of Sample -> List<filter string>
final ListMap<String, String> gtFilterStrings = new ListMap<>();
final Set<String> variantSamples = new HashSet<>();
for (final Genotype gt : ctx.getGenotypes()) {
if (gt.isCalled() && !gt.isHomRef())
variantSamples.add(gt.getSampleName());
for (final GenotypeFilter filter : gtFilters) {
final String filterString = filter.filter(ctx, gt);
if (filterString != null)
gtFilterStrings.add(gt.getSampleName(), filterString);
}
}
// If all genotypes are filtered apply a site level filter
if (gtFilterStrings.keySet().containsAll(variantSamples)) {
filterStrings.add(ALL_GTS_FILTERED);
}
// Make a builder and set the site level filter appropriately
final VariantContextBuilder builder = new VariantContextBuilder(ctx);
if (filterStrings.isEmpty()) {
builder.passFilters();
} else {
builder.filters(filterStrings);
}
// Apply filters to the necessary genotypes
builder.noGenotypes();
final List<Genotype> newGenotypes = new ArrayList<>(ctx.getNSamples());
for (final Genotype gt : ctx.getGenotypes()) {
final GenotypeBuilder gtBuilder = new GenotypeBuilder(gt);
final List<String> filters = gtFilterStrings.get(gt.getSampleName());
if (filters == null || filters.isEmpty()) {
gtBuilder.filter(PASS_FILTER);
} else {
gtBuilder.filters(filters);
}
newGenotypes.add(gtBuilder.make());
}
builder.genotypes(newGenotypes);
return builder.make();
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class MakeSitesOnlyVcf method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final VCFFileReader reader = new VCFFileReader(INPUT, false);
final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final ProgressLogger progress = new ProgressLogger(logger, 10000);
// Setup the site-only file writer
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
if (CREATE_INDEX)
builder.setOption(Options.INDEX_ON_THE_FLY);
else
builder.unsetOption(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter writer = builder.build()) {
final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
writer.writeHeader(header);
// Go through the input, strip the records and write them to the output
final CloseableIterator<VariantContext> iterator = reader.iterator();
while (iterator.hasNext()) {
final VariantContext full = iterator.next();
final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
writer.add(site);
progress.record(site.getContig(), site.getStart());
}
CloserUtil.close(iterator);
CloserUtil.close(reader);
}
return null;
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
Aggregations