use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class ReorderSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary refDict = reference.getSequenceDictionary();
if (refDict == null) {
CloserUtil.close(in);
throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
}
printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
printDictionary("Reference", refDict);
Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
// has to be after we create the newOrder
SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
outHeader.setSequenceDictionary(refDict);
logger.info("Writing reads...");
if (in.hasIndex()) {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
// write the reads in contig order
for (final SAMSequenceRecord contig : refDict.getSequences()) {
final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
writeReads(out, it, newOrder, contig.getSequenceName());
}
// don't forget the unmapped reads
writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
}
} else {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
writeReads(out, in.iterator(), newOrder, "All reads");
}
}
// cleanup
CloserUtil.close(in);
return null;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class MakeSitesOnlyVcf method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final VCFFileReader reader = new VCFFileReader(INPUT, false);
final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final ProgressLogger progress = new ProgressLogger(logger, 10000);
// Setup the site-only file writer
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
if (CREATE_INDEX)
builder.setOption(Options.INDEX_ON_THE_FLY);
else
builder.unsetOption(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter writer = builder.build()) {
final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
writer.writeHeader(header);
// Go through the input, strip the records and write them to the output
final CloseableIterator<VariantContext> iterator = reader.iterator();
while (iterator.hasNext()) {
final VariantContext full = iterator.next();
final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
writer.add(site);
progress.record(site.getContig(), site.getStart());
}
CloserUtil.close(iterator);
CloserUtil.close(reader);
}
return null;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SortVcf method collectFileReadersAndHeaders.
private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
for (final File input : INPUT) {
final VCFFileReader in = new VCFFileReader(input, false);
final VCFHeader header = in.getFileHeader();
final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
if (null == samSequenceDictionary) {
throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
}
header.setSequenceDictionary(samSequenceDictionary);
} else {
if (null == samSequenceDictionary) {
samSequenceDictionary = dict;
} else {
try {
samSequenceDictionary.assertSameDictionary(dict);
} catch (final AssertionError e) {
throw new IllegalArgumentException(e);
}
}
}
if (sampleList.isEmpty()) {
sampleList.addAll(header.getSampleNamesInOrder());
} else {
if (!sampleList.equals(header.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
}
}
inputReaders.add(in);
inputHeaders.add(header);
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalStart.
//---------------------------------------------------------------------------------------------------------------
//
// onTraversalStart
//
//---------------------------------------------------------------------------------------------------------------
@Override
public void onTraversalStart() {
if (gatk3Compatibility) {
// Temporary argument for validation of GATK4 implementation against GATK3 results:
// Reset the RNG and draw a single int to align the RNG initial state with that used
// by GATK3 to allow comparison of results with GATK3
Utils.resetRandomGenerator();
Utils.getRandomGenerator().nextInt();
}
dataManager = new VariantDataManager(new ArrayList<>(USE_ANNOTATIONS), VRAC);
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
Utils.warnUser(logger, String.format("Rscript not found in environment path. %s will be generated but PDF plots will not.", RSCRIPT_FILE));
if (IGNORE_INPUT_FILTERS != null) {
ignoreInputFilterSet.addAll(IGNORE_INPUT_FILTERS);
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
for (FeatureInput<VariantContext> variantFeature : resource) {
dataManager.addTrainingSet(new TrainingSet(variantFeature));
}
if (!dataManager.checkHasTrainingSet()) {
throw new CommandLineException("No training set found! Please provide sets of known polymorphic loci marked with the training=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
if (!dataManager.checkHasTruthSet()) {
throw new CommandLineException("No truth set found! Please provide sets of known polymorphic loci marked with the truth=true feature input tag. For example, -resource hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf");
}
//TODO: this should be refactored/consolidated as part of
// https://github.com/broadinstitute/gatk/issues/2112
// https://github.com/broadinstitute/gatk/issues/121,
// https://github.com/broadinstitute/gatk/issues/1116 and
// Initialize VCF header lines
Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
VariantRecalibrationUtils.addVQSRStandardHeaderLines(hInfo);
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
if (hasReference()) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, referenceArguments.getReferenceFile(), sequenceDictionary, true);
} else if (null != sequenceDictionary) {
hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
}
recalWriter = createVCFWriter(new File(output));
recalWriter.writeHeader(new VCFHeader(hInfo));
for (int iii = 0; iii < REPLICATE * 2; iii++) {
replicate.add(Utils.getRandomGenerator().nextDouble());
}
}
Aggregations