use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SortVcf method collectFileReadersAndHeaders.
private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
for (final File input : INPUT) {
final VCFFileReader in = new VCFFileReader(input, false);
final VCFHeader header = in.getFileHeader();
final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
if (null == samSequenceDictionary) {
throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
}
header.setSequenceDictionary(samSequenceDictionary);
} else {
if (null == samSequenceDictionary) {
samSequenceDictionary = dict;
} else {
try {
samSequenceDictionary.assertSameDictionary(dict);
} catch (final AssertionError e) {
throw new IllegalArgumentException(e);
}
}
}
if (sampleList.isEmpty()) {
sampleList.addAll(header.getSampleNamesInOrder());
} else {
if (!sampleList.equals(header.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
}
}
inputReaders.add(in);
inputHeaders.add(header);
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalStart.
//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
if (gatk3Compatibility) {
// Temporary argument for validation of GATK4 implementation against GATK3 results:
// Reset the RNG and draw a single int to align the RNG initial state with that used
// by GATK3 to allow comparison of results with GATK3
Utils.resetRandomGenerator();
Utils.getRandomGenerator().nextInt();
}
dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
if (IGNORE_INPUT_FILTERS != null) {
ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
for (FeatureInput<VariantContext> variantFeature : resource) {
dataManager.addTrainingSet(new TrainingSet(variantFeature));
}
if (!dataManager.checkHasTrainingSet()) {
throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
if (!dataManager.checkHasTruthSet()) {
throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/2112
// https://github.com/broadinstitute/gatk/issues/121,
// https://github.com/broadinstitute/gatk/issues/1116 and
// Initialize VCF header lines
Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
if (hasReference()) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
} else if (null != sequenceDictionary) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
}
recalWriter = createVCFWriter(new File(output));
recalWriter.writeHeader(new VCFHeader(hInfo));
for (int iii = 0; iii < REPLICATE * 2; iii++) {
replicate.add(Utils.getRandomGenerator().nextDouble());
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SelectVariants method onTraversalStart.
/**
* Set up the VCF writer, the sample expressions and regexs, filters inputs, and the JEXL matcher
*
*/
@Override
public void onTraversalStart() {
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
// Initialize VCF header lines
final Set<VCFHeaderLine> headerLines = createVCFHeaderLineList(vcfHeaders);
for (int i = 0; i < selectExpressions.size(); i++) {
// It's not necessary that the user supply select names for the JEXL expressions, since those
// expressions will only be needed for omitting records. Make up the select names here.
selectNames.add(String.format("select-%d", i));
}
jexls = VariantContextUtils.initializeMatchExps(selectNames, selectExpressions);
// Prepare the sample names and types to be used by the corresponding filters
samples = createSampleNameInclusionList(vcfHeaders);
selectedTypes = createSampleTypeInclusionList();
// Look at the parameters to decide which analysis to perform
discordanceOnly = discordanceTrack != null;
if (discordanceOnly) {
logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName());
}
concordanceOnly = concordanceTrack != null;
if (concordanceOnly) {
logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
}
if (mendelianViolations) {
sampleDB = initializeSampleDB();
mv = new MendelianViolation(mendelianViolationQualThreshold, false, true);
}
selectRandomFraction = fractionRandom > 0;
if (selectRandomFraction) {
logger.info("Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/121 and
// https://github.com/broadinstitute/gatk/issues/1116
Set<VCFHeaderLine> actualLines = null;
SAMSequenceDictionary sequenceDictionary = null;
if (hasReference()) {
File refFile = referenceArguments.getReferenceFile();
sequenceDictionary = this.getReferenceDictionary();
actualLines = VcfUtils.updateHeaderContigLines(headerLines, refFile, sequenceDictionary, suppressReferencePath);
} else {
sequenceDictionary = getHeaderForVariants().getSequenceDictionary();
if (null != sequenceDictionary) {
actualLines = VcfUtils.updateHeaderContigLines(headerLines, null, sequenceDictionary, suppressReferencePath);
} else {
actualLines = headerLines;
}
}
vcfWriter = createVCFWriter(outFile);
vcfWriter.writeHeader(new VCFHeader(actualLines, samples));
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class UpdateVCFSequenceDictionary method onTraversalStart.
@Override
public void onTraversalStart() {
VCFHeader inputHeader = getHeaderForVariants();
VCFHeader outputHeader = inputHeader == null ? new VCFHeader() : new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples());
getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
sourceDictionary = getSequenceDictionaryFromInput(dictionarySource);
// Warn and require opt-in via -replace if we're about to clobber a valid sequence
// dictionary. Check the input file directly via the header rather than using the
// engine, since it might dig one up from an index.
SAMSequenceDictionary oldDictionary = inputHeader == null ? null : inputHeader.getSequenceDictionary();
if ((oldDictionary != null && !oldDictionary.getSequences().isEmpty()) && !replace) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file %s already contains a sequence dictionary. " + "Use %s to force the dictionary to be replaced.", getDrivingVariantsFeatureInput().getName(), REPLACE_ARGUMENT_NAME));
}
outputHeader.setSequenceDictionary(sourceDictionary);
vcfWriter = createVCFWriter(new File(outFile));
vcfWriter.writeHeader(outputHeader);
}
Aggregations