Search in sources :

Example 91 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SplitVcfs method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final VCFFileReader fileReader = new VCFFileReader(INPUT);
    final VCFHeader fileHeader = fileReader.getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
        final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build()) {
        snpWriter.writeHeader(fileHeader);
        indelWriter.writeHeader(fileHeader);
        int incorrectVariantCount = 0;
        final CloseableIterator<VariantContext> iterator = fileReader.iterator();
        while (iterator.hasNext()) {
            final VariantContext context = iterator.next();
            if (context.isIndel())
                indelWriter.add(context);
            else if (context.isSNP())
                snpWriter.add(context);
            else {
                if (STRICT)
                    throw new IllegalStateException("Found a record with type " + context.getType().name());
                else
                    incorrectVariantCount++;
            }
            progress.record(context.getContig(), context.getStart());
        }
        if (incorrectVariantCount > 0) {
            logger.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
        }
        CloserUtil.close(iterator);
        CloserUtil.close(fileReader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 92 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class PathSeqKmerSpark method runTool.

/** Get the list of distinct kmers in the reference, and write them to a file as a HopScotchSet. */
@Override
protected void runTool(final JavaSparkContext ctx) {
    final SAMFileHeader hdr = getHeaderForReads();
    SAMSequenceDictionary dict = null;
    if (hdr != null)
        dict = hdr.getSequenceDictionary();
    final PipelineOptions options = getAuthenticatedGCSOptions();
    final ReferenceMultiSource referenceMultiSource = getReference();
    final List<SVKmer> kmerList = findKmers(ctx, KMER_SIZE, referenceMultiSource, options, dict);
    final HopscotchSet<SVKmer> kmerSet = new HopscotchSet<>(kmerList);
    final Output output = new Output(BucketUtils.createFile(OUTPUT_FILE));
    final Kryo kryo = new Kryo();
    kryo.setReferences(false);
    kryo.writeClassAndObject(output, kmerSet);
    output.close();
}
Also used : HopscotchSet(org.broadinstitute.hellbender.tools.spark.utils.HopscotchSet) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) Output(com.esotericsoftware.kryo.io.Output) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Kryo(com.esotericsoftware.kryo.Kryo)

Example 93 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class GATKToolUnitTest method testBestSequenceDictionary_fromVariants.

@Test
public void testBestSequenceDictionary_fromVariants() throws Exception {
    final GATKTool tool = new TestGATKToolWithFeatures();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_withSequenceDict.vcf");
    final String[] args = { "--mask", vcfFile.getCanonicalPath() };
    clp.parseArguments(System.out, args);
    tool.onStartup();
    //read the dict back in and compare to vcf dict
    final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
    try (final VCFFileReader reader = new VCFFileReader(vcfFile)) {
        final SAMSequenceDictionary vcfDict = reader.getFileHeader().getSequenceDictionary();
        toolDict.assertSameDictionary(vcfDict);
        vcfDict.assertSameDictionary(toolDict);
        Assert.assertEquals(toolDict, vcfDict);
    }
}
Also used : CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 94 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class GATKToolUnitTest method testBestSequenceDictionary_fromNothing.

@Test
public void testBestSequenceDictionary_fromNothing() throws Exception {
    final GATKTool tool = new TestGATKToolWithNothing();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final String[] args = {};
    clp.parseArguments(System.out, args);
    tool.onStartup();
    //read the dict back in and assert that it's null
    final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
    Assert.assertNull(toolDict);
}
Also used : CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 95 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class GATKToolUnitTest method testBestSequenceDictionary_fromReadsAndReference.

@Test
public void testBestSequenceDictionary_fromReadsAndReference() throws Exception {
    final GATKTool tool = new TestGATKToolWithReads();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
    final String fastaFile = hg19MiniReference;
    final String[] args = { "-I", bamFile.getCanonicalPath(), "-R", fastaFile };
    clp.parseArguments(System.out, args);
    tool.onStartup();
    //read the dict back in and compare to reference dict
    final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
    final SAMSequenceDictionary fastaDict = new IndexedFastaSequenceFile(new File(fastaFile)).getSequenceDictionary();
    toolDict.assertSameDictionary(fastaDict);
    fastaDict.assertSameDictionary(toolDict);
    Assert.assertEquals(toolDict, fastaDict);
}
Also used : CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11