Search in sources :

Example 61 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class VariantWalkerIntegrationTest method testBestSequenceDictionary_FromVariantReference.

@Test
public void testBestSequenceDictionary_FromVariantReference() throws Exception {
    final GATKTool tool = new TestGATKToolWithFeatures();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/example_variants_noSequenceDict.vcf");
    final String[] args = { "-V", vcfFile.getCanonicalPath(), "-R", hg19MiniReference };
    clp.parseArguments(System.out, args);
    tool.onStartup();
    // make sure we DON'T get the seq dictionary from the VCF index, and instead get the one from
    // the reference when its better
    final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
    Assert.assertFalse(toolDict.getSequences().stream().allMatch(seqRec -> seqRec.getSequenceLength() == 0));
    SAMSequenceDictionary refDict = new ReferenceFileSource(new File(hg19MiniReference)).getSequenceDictionary();
    toolDict.assertSameDictionary(refDict);
    refDict.assertSameDictionary(toolDict);
    Assert.assertEquals(toolDict, refDict);
}
Also used : CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Iterator(java.util.Iterator) CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) TestProgramGroup(org.broadinstitute.hellbender.cmdline.TestProgramGroup) ExampleVariantWalker(org.broadinstitute.hellbender.tools.examples.ExampleVariantWalker) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) ArgumentsBuilder(org.broadinstitute.hellbender.utils.test.ArgumentsBuilder) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) ArrayList(java.util.ArrayList) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) Assert(org.testng.Assert) VariantContext(htsjdk.variant.variantcontext.VariantContext) CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 62 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class ReferenceMemorySourceTest method init.

@BeforeTest
public void init() {
    List<SAMSequenceRecord> l = new ArrayList<>();
    l.add(new SAMSequenceRecord("chr1", 1000000));
    SAMSequenceDictionary seqDir = new SAMSequenceDictionary(l);
    memorySource = new ReferenceMemorySource(ref1, seqDir);
}
Also used : ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BeforeTest(org.testng.annotations.BeforeTest)

Example 63 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.

@Test
public void testPreserveContigOrderingInHeader() throws IOException {
    final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
    writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
    try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
        final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
        final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
        final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
        Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) GenomicsDBFeatureReader(com.intel.genomicsdb.GenomicsDBFeatureReader) PositionalBufferedStream(htsjdk.tribble.readers.PositionalBufferedStream) BCF2Codec(htsjdk.variant.bcf2.BCF2Codec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 64 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk-protected by broadinstitute.

the class CreateSomaticPanelOfNormals method doWork.

public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }
    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();
    return "SUCCESS";
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 65 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11