Search in sources :

Example 81 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class HaplotypeCaller method onTraversalStart.

@Override
public void onTraversalStart() {
    final ReferenceSequenceFile referenceReader = getReferenceReader(referenceArguments);
    hcEngine = new HaplotypeCallerEngine(hcArgs, getHeaderForReads(), referenceReader);
    // The HC engine will make the right kind (VCF or GVCF) of writer for us
    final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary();
    vcfWriter = hcEngine.makeVCFWriter(outputVCF, sequenceDictionary);
    hcEngine.writeHeader(vcfWriter, sequenceDictionary, getDefaultToolVCFHeaderLines());
}
Also used : ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 82 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SparkSharderUnitTest method testPartitionReadExtents.

@Test
public void testPartitionReadExtents() throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    // Consider the following reads.
    // This test checks the partition read extents when the reads are divided into
    // different numbers of partitions (1, 2, or 3), and with different sequence dictionaries.
    //                      1                   2
    //    1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
    // ---------------------------------------------------------
    // Reads
    //   [-----] chr 1
    //           [-----] chr 1
    //               [-----] chr 1
    //                       [-----] chr 2
    //                         [-----] chr 2
    //                           [-----] chr 2
    // ---------------------------------------------------------
    //    1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
    ImmutableList<TestRead> reads = ImmutableList.of(new TestRead("1", 1, 3), new TestRead("1", 5, 7), new TestRead("1", 7, 9), new TestRead("2", 11, 13), new TestRead("2", 12, 14), new TestRead("2", 13, 15));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 1), sequenceDictionary, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 50))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 2), sequenceDictionary, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), // since last read of partition 0 _could_ end at start of first read in partition 1 + max read length
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 14)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 11, 50))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 3), sequenceDictionary, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 10)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("1", 7, 100)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 1, 15)), new SparkSharder.PartitionLocatable<>(2, new SimpleInterval("2", 12, 50))));
    // Use a different dictionary with contig 3 at the end
    SAMSequenceDictionary sequenceDictionary123 = new SAMSequenceDictionary(ImmutableList.of(new SAMSequenceRecord("1", 100), new SAMSequenceRecord("2", 50), new SAMSequenceRecord("3", 25)));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 1), sequenceDictionary123, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 50)), // partition could contain contig 3 reads
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("3", 1, 25))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 2), sequenceDictionary123, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), // since last read of partition 0 _could_ end at start of first read in partition 1 + max read length
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 14)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 11, 50)), // partition could contain contig 3 reads
    new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("3", 1, 25))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 3), sequenceDictionary123, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 10)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("1", 7, 100)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 1, 15)), new SparkSharder.PartitionLocatable<>(2, new SimpleInterval("2", 12, 50)), // partition could contain contig 3 reads
    new SparkSharder.PartitionLocatable<>(2, new SimpleInterval("3", 1, 25))));
    // Use a different dictionary with contig X between contigs 1 and 2
    SAMSequenceDictionary sequenceDictionary1X2 = new SAMSequenceDictionary(ImmutableList.of(new SAMSequenceRecord("1", 100), new SAMSequenceRecord("X", 75), new SAMSequenceRecord("2", 50)));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 1), sequenceDictionary1X2, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), // partition could contain contig X reads
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("X", 1, 75)), new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 50))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 2), sequenceDictionary1X2, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 100)), // partition could contain contig X reads
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("X", 1, 75)), // since last read of partition 0 _could_ end at start of first read in partition 1 + max read length
    new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("2", 1, 14)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 11, 50))));
    assertEquals(SparkSharder.computePartitionReadExtents(ctx.parallelize(reads, 3), sequenceDictionary1X2, STANDARD_READ_LENGTH), ImmutableList.of(new SparkSharder.PartitionLocatable<>(0, new SimpleInterval("1", 1, 10)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("1", 7, 100)), // partition could contain contig X reads
    new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("X", 1, 75)), new SparkSharder.PartitionLocatable<>(1, new SimpleInterval("2", 1, 15)), new SparkSharder.PartitionLocatable<>(2, new SimpleInterval("2", 12, 50))));
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 83 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class GATKVariantContextUtilsUnitTest method makeSimpleSequenceDictionary.

private SAMSequenceDictionary makeSimpleSequenceDictionary() {
    final SAMSequenceDictionary seqDictionary = new SAMSequenceDictionary();
    seqDictionary.addSequence(new SAMSequenceRecord("chr1", 10));
    return seqDictionary;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 84 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class VcfUtilsUnitTest method createSequenceDictonary.

private SAMSequenceDictionary createSequenceDictonary() {
    List<SAMSequenceRecord> seqRecList = new ArrayList<>(2);
    seqRecList.add(new SAMSequenceRecord("contig1", 100));
    seqRecList.add(new SAMSequenceRecord("contig2", 200));
    return new SAMSequenceDictionary(seqRecList);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 85 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk-protected by broadinstitute.

the class AnnotateTargetsIntegrationTest method createTargetFile.

@BeforeClass
public void createTargetFile() throws IOException {
    final SAMSequenceDictionary referenceDictionary = resolveReferenceDictionary();
    final List<SimpleInterval> targetIntervals = createRandomIntervals(referenceDictionary, NUMBER_OF_TARGETS, MIN_TARGET_SIZE, MAX_TARGET_SIZE, MEAN_TARGET_SIZE, TARGET_SIZE_STDEV);
    final List<Target> targets = targetIntervals.stream().map(Target::new).collect(Collectors.toList());
    TargetWriter.writeTargetsToFile(TARGET_FILE, targets);
    final Index index = IndexFactory.createIndex(TARGET_FILE, new TargetCodec(), IndexFactory.IndexType.LINEAR);
    final LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(TARGET_FILE_IDX));
    index.write(stream);
    stream.close();
}
Also used : LittleEndianOutputStream(htsjdk.tribble.util.LittleEndianOutputStream) TargetCodec(org.broadinstitute.hellbender.utils.codecs.TargetCodec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Index(htsjdk.tribble.index.Index) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11