Search in sources :

Example 1 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testReferenceSourceMultiPageQuery.

@Test(groups = "cloud")
public void testReferenceSourceMultiPageQuery() {
    final int mio = 1_000_000;
    final ReferenceBases bases1 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50000 + mio + 50));
    final ReferenceBases bases2 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50025, 50025 + mio + 50));
    Assert.assertNotNull(bases1);
    Assert.assertNotNull(bases1.getBases());
    Assert.assertNotNull(bases2);
    Assert.assertNotNull(bases2.getBases());
    // those SimpleIntervals include the end, hence +1
    Assert.assertEquals(bases1.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
    Assert.assertEquals(bases2.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
    // grab some bases around the seam
    ReferenceBases seam1 = bases1.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
    ReferenceBases seam2 = bases2.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
    Assert.assertEquals(seam1.getBases(), seam2.getBases(), "seam doesn't match (paging bug?)");
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 2 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testReferenceSourceQuery.

@Test(groups = "cloud")
public void testReferenceSourceQuery() {
    final ReferenceBases bases = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50009));
    Assert.assertNotNull(bases);
    Assert.assertNotNull(bases.getBases());
    Assert.assertEquals(bases.getBases().length, 10, "Wrong number of bases returned");
    Assert.assertEquals(new String(bases.getBases()), "TAAACAGGTT", "Wrong bases returned");
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class ReferenceAPISourceUnitTest method testDummy.

@Test(groups = "cloud")
public void testDummy() {
    String referenceName = HS37D5_REF_ID;
    final String expected = "AAACAGGTTA";
    // -1 because we're using closed intervals
    SimpleInterval interval = new SimpleInterval("1", 50001, 50001 + expected.length() - 1);
    Logger logger = LogManager.getLogger(ReferenceAPISourceUnitTest.class);
    GenomicsOptions options = PipelineOptionsFactory.create().as(GenomicsOptions.class);
    options.setApiKey(getGCPTestApiKey());
    options.setProject(getGCPTestProject());
    // We don't use GATKTestPipeline because we need specific options.
    final Pipeline p = TestPipeline.create(options);
    ReferenceAPISource refAPISource = makeReferenceAPISource(referenceName, p);
    ReferenceBases bases = refAPISource.getReferenceBases(p.getOptions(), interval);
    final String actual = new String(bases.getBases());
    Assert.assertEquals(actual, expected, "Wrong bases returned");
    p.run();
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceAPISource(org.broadinstitute.hellbender.engine.datasources.ReferenceAPISource) GenomicsOptions(com.google.cloud.genomics.dataflow.utils.GenomicsOptions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Logger(org.apache.logging.log4j.Logger) TestPipeline(com.google.cloud.dataflow.sdk.testing.TestPipeline) Pipeline(com.google.cloud.dataflow.sdk.Pipeline) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class BaseRecalibratorSparkFn method apply.

public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
    JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
        final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
        bqsr.logCovariatesUsed();
        while (readWithContextIterator.hasNext()) {
            final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
            Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
            final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
            ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
            bqsr.processRead(readWithData._1(), refDS, variants);
        }
        return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
    });
    final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
    final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
    BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
    final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
    final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
    return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases)

Example 5 with ReferenceBases

use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.

the class BaseRecalibratorEngineSparkWrapper method apply.

public Iterator<RecalibrationTables> apply(Iterator<ContextShard> shards) throws Exception {
    this.header = headerBcast.value();
    this.referenceSequenceDictionary = referenceSequenceDictionaryBcast.value();
    recalibrationEngine = new BaseRecalibrationEngine(recalArgs, header);
    while (shards.hasNext()) {
        ContextShard shard = shards.next();
        for (int i = 0; i < shard.reads.size(); i++) {
            final GATKRead read = shard.reads.get(i);
            // Reads are shipped without the header -- put it back in
            ReadUtils.restoreHeaderIfNecessary(read, header);
            final ReadContextData rc = shard.readContext.get(i);
            final Iterable<GATKVariant> variants = rc.getOverlappingVariants();
            final ReferenceBases refBases = rc.getOverlappingReferenceBases();
            final ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceSequenceDictionary);
            recalibrationEngine.processRead(read, refDS, variants);
        }
    }
    ArrayList<RecalibrationTables> ret = new ArrayList<>();
    ret.add(recalibrationEngine.getRecalibrationTables());
    return ret.iterator();
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ArrayList(java.util.ArrayList) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases)

Aggregations

ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)29 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)24 Test (org.testng.annotations.Test)15 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)14 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)6 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)6 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)5 ReferenceDataSource (org.broadinstitute.hellbender.engine.ReferenceDataSource)5 ReferenceMemorySource (org.broadinstitute.hellbender.engine.ReferenceMemorySource)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 ReadContextData (org.broadinstitute.hellbender.engine.ReadContextData)4 GATKVariant (org.broadinstitute.hellbender.utils.variant.GATKVariant)4 PipelineOptions (com.google.cloud.dataflow.sdk.options.PipelineOptions)3 Allele (htsjdk.variant.variantcontext.Allele)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3