Search in sources :

Example 11 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class AddContextDataToReadSparkUnitTest method addContextDataTest.

@Test(dataProvider = "bases", groups = "spark")
public void addContextDataTest(List<GATKRead> reads, List<GATKVariant> variantList, List<KV<GATKRead, ReadContextData>> expectedReadContextData, JoinStrategy joinStrategy) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    JavaRDD<GATKRead> rddReads = ctx.parallelize(reads);
    JavaRDD<GATKVariant> rddVariants = ctx.parallelize(variantList);
    ReferenceMultiSource mockSource = mock(ReferenceMultiSource.class, withSettings().serializable());
    when(mockSource.getReferenceBases(any(PipelineOptions.class), any())).then(new ReferenceBasesAnswer());
    when(mockSource.getReferenceWindowFunction()).thenReturn(ReferenceWindowFunctions.IDENTITY_FUNCTION);
    SAMSequenceDictionary sd = new SAMSequenceDictionary(Lists.newArrayList(new SAMSequenceRecord("1", 100000), new SAMSequenceRecord("2", 100000)));
    when(mockSource.getReferenceSequenceDictionary(null)).thenReturn(sd);
    JavaPairRDD<GATKRead, ReadContextData> rddActual = AddContextDataToReadSpark.add(ctx, rddReads, mockSource, rddVariants, joinStrategy, sd, 10000, 1000);
    Map<GATKRead, ReadContextData> actual = rddActual.collectAsMap();
    Assert.assertEquals(actual.size(), expectedReadContextData.size());
    for (KV<GATKRead, ReadContextData> kv : expectedReadContextData) {
        ReadContextData readContextData = actual.get(kv.getKey());
        Assert.assertNotNull(readContextData);
        Assert.assertTrue(CollectionUtils.isEqualCollection(Lists.newArrayList(readContextData.getOverlappingVariants()), Lists.newArrayList(kv.getValue().getOverlappingVariants())));
        SimpleInterval minimalInterval = kv.getValue().getOverlappingReferenceBases().getInterval();
        ReferenceBases subset = readContextData.getOverlappingReferenceBases().getSubset(minimalInterval);
        Assert.assertEquals(subset, kv.getValue().getOverlappingReferenceBases());
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 12 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceTwoBitSourceUnitTest method testQueryPastContigEnd.

@Test(dataProvider = "outOfBoundsIntervals")
public void testQueryPastContigEnd(final ReferenceTwoBitSource refSource, final SimpleInterval outOfBoundsInterval, final int expectedNumBases, final int contigEnd) throws IOException {
    final ReferenceBases bases = refSource.getReferenceBases(null, outOfBoundsInterval);
    // Verify that the ReferenceTwoBitSource cropped our out-of-bounds interval at the contig end, as expected,
    // and that we got the correct number of bases back.
    Assert.assertEquals(bases.getInterval().getEnd(), contigEnd, "Interval was not cropped at contig end");
    Assert.assertEquals(bases.getBases().length, expectedNumBases, "Wrong number of bases returned from query");
    Assert.assertEquals(bases.getInterval().size(), expectedNumBases, "Wrong interval in ReferenceBases object returned from query");
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 13 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testReferenceSourceMultiPageQuery.

@Test(groups = "cloud")
public void testReferenceSourceMultiPageQuery() {
    final int mio = 1_000_000;
    final ReferenceBases bases1 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50000 + mio + 50));
    final ReferenceBases bases2 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50025, 50025 + mio + 50));
    Assert.assertNotNull(bases1);
    Assert.assertNotNull(bases1.getBases());
    Assert.assertNotNull(bases2);
    Assert.assertNotNull(bases2.getBases());
    // those SimpleIntervals include the end, hence +1
    Assert.assertEquals(bases1.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
    Assert.assertEquals(bases2.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
    // grab some bases around the seam
    ReferenceBases seam1 = bases1.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
    ReferenceBases seam2 = bases2.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
    Assert.assertEquals(seam1.getBases(), seam2.getBases(), "seam doesn't match (paging bug?)");
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 14 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testReferenceSourceQuery.

@Test(groups = "cloud")
public void testReferenceSourceQuery() {
    final ReferenceBases bases = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50009));
    Assert.assertNotNull(bases);
    Assert.assertNotNull(bases.getBases());
    Assert.assertEquals(bases.getBases().length, 10, "Wrong number of bases returned");
    Assert.assertEquals(new String(bases.getBases()), "TAAACAGGTT", "Wrong bases returned");
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 15 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testDummy.

@Test(groups = "cloud")
public void testDummy() {
    String referenceName = HS37D5_REF_ID;
    final String expected = "AAACAGGTTA";
    // -1 because we're using closed intervals
    SimpleInterval interval = new SimpleInterval("1", 50001, 50001 + expected.length() - 1);
    Logger logger = LogManager.getLogger(ReferenceAPISourceUnitTest.class);
    GenomicsOptions options = PipelineOptionsFactory.create().as(GenomicsOptions.class);
    options.setApiKey(getGCPTestApiKey());
    options.setProject(getGCPTestProject());
    // We don't use GATKTestPipeline because we need specific options.
    final Pipeline p = TestPipeline.create(options);
    ReferenceAPISource refAPISource = makeReferenceAPISource(referenceName, p);
    ReferenceBases bases = refAPISource.getReferenceBases(p.getOptions(), interval);
    final String actual = new String(bases.getBases());
    Assert.assertEquals(actual, expected, "Wrong bases returned");
    p.run();
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceAPISource(org.broadinstitute.hellbender.engine.datasources.ReferenceAPISource) GenomicsOptions(com.google.cloud.genomics.dataflow.utils.GenomicsOptions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Logger(org.apache.logging.log4j.Logger) TestPipeline(com.google.cloud.dataflow.sdk.testing.TestPipeline) Pipeline(com.google.cloud.dataflow.sdk.Pipeline) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)29 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)24 Test (org.testng.annotations.Test)15 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)14 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)6 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)6 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)5 ReferenceDataSource (org.broadinstitute.hellbender.engine.ReferenceDataSource)5 ReferenceMemorySource (org.broadinstitute.hellbender.engine.ReferenceMemorySource)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 ReadContextData (org.broadinstitute.hellbender.engine.ReadContextData)4 GATKVariant (org.broadinstitute.hellbender.utils.variant.GATKVariant)4 PipelineOptions (com.google.cloud.dataflow.sdk.options.PipelineOptions)3 Allele (htsjdk.variant.variantcontext.Allele)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3