use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class RenameSampleInVcf method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
try (final VCFFileReader in = new VCFFileReader(INPUT)) {
final VCFHeader header = in.getFileHeader();
Utils.validateArg(header.getGenotypeSamples().size() == 1, "Input VCF must be single-sample.");
Utils.validateArg(OLD_SAMPLE_NAME == null || OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0)), () -> "Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
if (CREATE_INDEX)
options.add(Options.INDEX_ON_THE_FLY);
else
options.remove(Options.INDEX_ON_THE_FLY);
final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).setOptions(options).build()) {
out.writeHeader(outHeader);
for (final VariantContext ctx : in) {
out.add(ctx);
}
}
}
return null;
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class SortVcf method sortInputs.
/**
* Merge the inputs and sort them by adding each input's content to a single SortingCollection.
* <p/>
* NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
* Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
* MergeVcfs exists for simple merging of presorted inputs.
*
* @param readers - a list of VCFFileReaders, one for each input VCF
* @param outputHeader - The merged header whose information we intend to use in the final output file
*/
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
// NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
// We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
int readerCount = 1;
for (final VCFFileReader reader : readers) {
logger.info("Reading entries from input file " + readerCount);
for (final VariantContext variantContext : reader) {
sorter.add(variantContext);
readProgress.record(variantContext.getContig(), variantContext.getStart());
}
reader.close();
readerCount++;
}
return sorter;
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class SortVcf method writeSortedOutput.
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
out.writeHeader(outputHeader);
for (final VariantContext variantContext : sortedOutput) {
out.add(variantContext);
writeProgress.record(variantContext.getContig(), variantContext.getStart());
}
out.close();
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class PairedVariantSubContextIterator method next.
@Override
public VcTuple next() {
Utils.validate(hasNext(), "next() called while hasNext() is false.");
final VariantContext truthVariantContext = this.truthIterator.hasNext() ? this.truthIterator.peek() : null;
final VariantContext callVariantContext = this.callIterator.hasNext() ? this.callIterator.peek() : null;
// If one or the other is null because there is no next, just return a one-sided tuple
if (truthVariantContext == null) {
return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
} else if (callVariantContext == null) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
}
// Otherwise check the ordering and do the right thing
final int ordering = this.comparator.compare(truthVariantContext, callVariantContext);
if (ordering == 0) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), this.callIterator.next().subContextFromSample(callSample));
} else if (ordering < 0) {
return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
} else {
return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
}
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalStart.
//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
if (gatk3Compatibility) {
// Temporary argument for validation of GATK4 implementation against GATK3 results:
// Reset the RNG and draw a single int to align the RNG initial state with that used
// by GATK3 to allow comparison of results with GATK3
Utils.resetRandomGenerator();
Utils.getRandomGenerator().nextInt();
}
dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
if (IGNORE_INPUT_FILTERS != null) {
ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
for (FeatureInput<VariantContext> variantFeature : resource) {
dataManager.addTrainingSet(new TrainingSet(variantFeature));
}
if (!dataManager.checkHasTrainingSet()) {
throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
if (!dataManager.checkHasTruthSet()) {
throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/2112
// https://github.com/broadinstitute/gatk/issues/121,
// https://github.com/broadinstitute/gatk/issues/1116 and
// Initialize VCF header lines
Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
if (hasReference()) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
} else if (null != sequenceDictionary) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
}
recalWriter = createVCFWriter(new File(output));
recalWriter.writeHeader(new VCFHeader(hInfo));
for (int iii = 0; iii < REPLICATE * 2; iii++) {
replicate.add(Utils.getRandomGenerator().nextDouble());
}
}
Aggregations