use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method testReferenceSourceMultiPageQuery.
@Test(groups = "cloud")
public void testReferenceSourceMultiPageQuery() {
final int mio = 1_000_000;
final ReferenceBases bases1 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50000 + mio + 50));
final ReferenceBases bases2 = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50025, 50025 + mio + 50));
Assert.assertNotNull(bases1);
Assert.assertNotNull(bases1.getBases());
Assert.assertNotNull(bases2);
Assert.assertNotNull(bases2.getBases());
// those SimpleIntervals include the end, hence +1
Assert.assertEquals(bases1.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
Assert.assertEquals(bases2.getBases().length, mio + 50 + 1, "Wrong number of bases returned");
// grab some bases around the seam
ReferenceBases seam1 = bases1.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
ReferenceBases seam2 = bases2.getSubset(new SimpleInterval("1", 50000 + mio - 100, 50000 + mio + 50));
Assert.assertEquals(seam1.getBases(), seam2.getBases(), "seam doesn't match (paging bug?)");
}
use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method testReferenceSourceQuery.
@Test(groups = "cloud")
public void testReferenceSourceQuery() {
final ReferenceBases bases = queryReferenceAPI(HS37D5_REF_ID, new SimpleInterval("1", 50000, 50009));
Assert.assertNotNull(bases);
Assert.assertNotNull(bases.getBases());
Assert.assertEquals(bases.getBases().length, 10, "Wrong number of bases returned");
Assert.assertEquals(new String(bases.getBases()), "TAAACAGGTT", "Wrong bases returned");
}
use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method testDummy.
@Test(groups = "cloud")
public void testDummy() {
String referenceName = HS37D5_REF_ID;
final String expected = "AAACAGGTTA";
// -1 because we're using closed intervals
SimpleInterval interval = new SimpleInterval("1", 50001, 50001 + expected.length() - 1);
Logger logger = LogManager.getLogger(ReferenceAPISourceUnitTest.class);
GenomicsOptions options = PipelineOptionsFactory.create().as(GenomicsOptions.class);
options.setApiKey(getGCPTestApiKey());
options.setProject(getGCPTestProject());
// We don't use GATKTestPipeline because we need specific options.
final Pipeline p = TestPipeline.create(options);
ReferenceAPISource refAPISource = makeReferenceAPISource(referenceName, p);
ReferenceBases bases = refAPISource.getReferenceBases(p.getOptions(), interval);
final String actual = new String(bases.getBases());
Assert.assertEquals(actual, expected, "Wrong bases returned");
p.run();
}
use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.
the class BaseRecalibratorSparkFn method apply.
public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
bqsr.logCovariatesUsed();
while (readWithContextIterator.hasNext()) {
final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
bqsr.processRead(readWithData._1(), refDS, variants);
}
return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
});
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
use of org.broadinstitute.hellbender.utils.reference.ReferenceBases in project gatk by broadinstitute.
the class BaseRecalibratorEngineSparkWrapper method apply.
public Iterator<RecalibrationTables> apply(Iterator<ContextShard> shards) throws Exception {
this.header = headerBcast.value();
this.referenceSequenceDictionary = referenceSequenceDictionaryBcast.value();
recalibrationEngine = new BaseRecalibrationEngine(recalArgs, header);
while (shards.hasNext()) {
ContextShard shard = shards.next();
for (int i = 0; i < shard.reads.size(); i++) {
final GATKRead read = shard.reads.get(i);
// Reads are shipped without the header -- put it back in
ReadUtils.restoreHeaderIfNecessary(read, header);
final ReadContextData rc = shard.readContext.get(i);
final Iterable<GATKVariant> variants = rc.getOverlappingVariants();
final ReferenceBases refBases = rc.getOverlappingReferenceBases();
final ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceSequenceDictionary);
recalibrationEngine.processRead(read, refDS, variants);
}
}
ArrayList<RecalibrationTables> ret = new ArrayList<>();
ret.add(recalibrationEngine.getRecalibrationTables());
return ret.iterator();
}
Aggregations