use of org.broadinstitute.hellbender.engine.ReferenceDataSource in project gatk by broadinstitute.
the class BaseRecalibratorSparkFn method apply.
public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
bqsr.logCovariatesUsed();
while (readWithContextIterator.hasNext()) {
final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
bqsr.processRead(readWithData._1(), refDS, variants);
}
return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
});
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
use of org.broadinstitute.hellbender.engine.ReferenceDataSource in project gatk by broadinstitute.
the class BaseRecalibrationEngineUnitTest method testCalculateIsIndel.
@Test(dataProvider = "CalculateIsIndelData")
public void testCalculateIsIndel(final String cigar, final boolean negStrand, final EventType mode, final int[] expected) {
final GATKRead read = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(cigar));
read.setIsReverseStrand(negStrand);
// Fake reference data, since the indel calculation does not use the reference at all.
final ReferenceDataSource refSource = new ReferenceMemorySource(new ReferenceBases(Utils.repeatBytes((byte) 'A', read.getEnd() - read.getStart() + 1), new SimpleInterval(read)), ArtificialReadUtils.createArtificialSamHeader().getSequenceDictionary());
int[] isSNP = new int[read.getLength()];
int[] isInsertion = new int[isSNP.length];
int[] isDeletion = new int[isSNP.length];
BaseRecalibrationEngine.calculateIsSNPOrIndel(read, refSource, isSNP, isInsertion, isDeletion);
final int[] actual = (mode == EventType.BASE_INSERTION ? isInsertion : isDeletion);
Assert.assertEquals(actual, expected, "calculateIsSNPOrIndel() failed with " + mode + " and cigar " + cigar + " Expected " + Arrays.toString(expected) + " but got " + Arrays.toString(actual));
}
use of org.broadinstitute.hellbender.engine.ReferenceDataSource in project gatk by broadinstitute.
the class HaplotypeCallerGenotypingEngine method makeAnnotatedCall.
protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
final SimpleInterval refLocInterval = new SimpleInterval(refLoc);
final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);
final VariantContext untrimmedResult = annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult : GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
}
use of org.broadinstitute.hellbender.engine.ReferenceDataSource in project gatk by broadinstitute.
the class BAQUnitTest method testBAQOverwritesExistingTagWithNull.
@Test
public void testBAQOverwritesExistingTagWithNull() {
final File referenceFile = new File(hg19_chr1_1M_Reference);
final ReferenceDataSource rds = new ReferenceFileSource(referenceFile);
// create a read with a single base off the end of the contig, which cannot be BAQed
final GATKRead read = ArtificialReadUtils.createArtificialRead(createHeader(), "foo", 0, rds.getSequenceDictionary().getSequence("1").getSequenceLength() + 1, 1);
read.setBases(new byte[] { (byte) 'A' });
read.setBaseQualities(new byte[] { (byte) 20 });
read.setCigar("1M");
read.setAttribute("BQ", "A");
// try to BAQ and tell it to RECALCULATE AND ADD_TAG
final BAQ baq = new BAQ(1.0e-3, 0.1, 7, (byte) 4);
baq.baqRead(read, rds, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
// did we remove the existing tag?
Assert.assertFalse(read.hasAttribute("BQ"));
}
use of org.broadinstitute.hellbender.engine.ReferenceDataSource in project gatk by broadinstitute.
the class ReferenceBasesUnitTest method test.
@Test
public void test() {
final File refFasta = new File(b37_reference_20_21);
final ReferenceDataSource refDataSource = new ReferenceFileSource(refFasta);
final ReferenceContext ref = new ReferenceContext(refDataSource, new SimpleInterval("20", 10_000_000, 10_000_200));
final VariantContext vc = new VariantContextBuilder("source", "20", 10_000_100, 10_000_100, Collections.singleton(Allele.create((byte) 'A', true))).make();
final String refBases = (String) new ReferenceBases().annotate(ref, vc, null).get(ReferenceBases.REFERENCE_BASES_KEY);
Assert.assertEquals(refBases, "ACTGCATCCCTTGCATTTCC");
}
Aggregations