use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class VariantWalkerIntegrationTest method testBestSequenceDictionary_FromVariantReference.
@Test
public void testBestSequenceDictionary_FromVariantReference() throws Exception {
final GATKTool tool = new TestGATKToolWithFeatures();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/example_variants_noSequenceDict.vcf");
final String[] args = { "-V", vcfFile.getCanonicalPath(), "-R", hg19MiniReference };
clp.parseArguments(System.out, args);
tool.onStartup();
// make sure we DON'T get the seq dictionary from the VCF index, and instead get the one from
// the reference when its better
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
Assert.assertFalse(toolDict.getSequences().stream().allMatch(seqRec -> seqRec.getSequenceLength() == 0));
SAMSequenceDictionary refDict = new ReferenceFileSource(new File(hg19MiniReference)).getSequenceDictionary();
toolDict.assertSameDictionary(refDict);
refDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, refDict);
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class ReferenceMemorySourceTest method init.
@BeforeTest
public void init() {
List<SAMSequenceRecord> l = new ArrayList<>();
l.add(new SAMSequenceRecord("chr1", 1000000));
SAMSequenceDictionary seqDir = new SAMSequenceDictionary(l);
memorySource = new ReferenceMemorySource(ref1, seqDir);
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.
@Test
public void testPreserveContigOrderingInHeader() throws IOException {
final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk-protected by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class MergeSamFiles method doWork.
/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
boolean matchedSortOrders = true;
// Open the files for reading and writing
final List<SamReader> readers = new ArrayList<>();
final List<SAMFileHeader> headers = new ArrayList<>();
{
// Used to try and reduce redundant SDs in memory
SAMSequenceDictionary dict = null;
for (final File inFile : INPUT) {
IOUtil.assertFileIsReadable(inFile);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
readers.add(in);
headers.add(in.getFileHeader());
// replace the duplicate copies with a single dictionary to reduce the memory footprint.
if (dict == null) {
dict = in.getFileHeader().getSequenceDictionary();
} else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
in.getFileHeader().setSequenceDictionary(dict);
}
matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
}
}
// If all the input sort orders match the output sort order then just merge them and
// write on the fly, otherwise setup to merge and sort before writing out the final file
IOUtil.assertFileIsWritable(OUTPUT);
final boolean presorted;
final SAMFileHeader.SortOrder headerMergerSortOrder;
final boolean mergingSamRecordIteratorAssumeSorted;
if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
headerMergerSortOrder = SORT_ORDER;
mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
presorted = true;
} else {
logger.info("Sorting input files using temp directory " + TMP_DIR);
headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
mergingSamRecordIteratorAssumeSorted = false;
presorted = false;
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
final SAMFileHeader header = headerMerger.getMergedHeader();
for (final String comment : COMMENT) {
header.addComment(comment);
}
header.setSortOrder(SORT_ORDER);
final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
if (USE_THREADING) {
samFileWriterFactory.setUseAsyncIo(true);
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
// Lastly loop through and write out the records
final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
logger.info("Finished reading inputs.");
CloserUtil.close(readers);
}
return null;
}
Aggregations