Search in sources :

Example 1 with ReferenceMemorySource

use of org.broadinstitute.hellbender.engine.ReferenceMemorySource in project gatk by broadinstitute.

the class BaseRecalibratorSparkFn method apply.

public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
    JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
        final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
        bqsr.logCovariatesUsed();
        while (readWithContextIterator.hasNext()) {
            final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
            Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
            final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
            ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
            bqsr.processRead(readWithData._1(), refDS, variants);
        }
        return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
    });
    final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
    final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
    BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
    final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
    final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
    return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases)

Example 2 with ReferenceMemorySource

use of org.broadinstitute.hellbender.engine.ReferenceMemorySource in project gatk by broadinstitute.

the class BaseRecalibrationEngineUnitTest method testCalculateIsIndel.

@Test(dataProvider = "CalculateIsIndelData")
public void testCalculateIsIndel(final String cigar, final boolean negStrand, final EventType mode, final int[] expected) {
    final GATKRead read = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(cigar));
    read.setIsReverseStrand(negStrand);
    // Fake reference data, since the indel calculation does not use the reference at all.
    final ReferenceDataSource refSource = new ReferenceMemorySource(new ReferenceBases(Utils.repeatBytes((byte) 'A', read.getEnd() - read.getStart() + 1), new SimpleInterval(read)), ArtificialReadUtils.createArtificialSamHeader().getSequenceDictionary());
    int[] isSNP = new int[read.getLength()];
    int[] isInsertion = new int[isSNP.length];
    int[] isDeletion = new int[isSNP.length];
    BaseRecalibrationEngine.calculateIsSNPOrIndel(read, refSource, isSNP, isInsertion, isDeletion);
    final int[] actual = (mode == EventType.BASE_INSERTION ? isInsertion : isDeletion);
    Assert.assertEquals(actual, expected, "calculateIsSNPOrIndel() failed with " + mode + " and cigar " + cigar + " Expected " + Arrays.toString(expected) + " but got " + Arrays.toString(actual));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 3 with ReferenceMemorySource

use of org.broadinstitute.hellbender.engine.ReferenceMemorySource in project gatk by broadinstitute.

the class HaplotypeCallerGenotypingEngine method makeAnnotatedCall.

protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
    final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
    final SimpleInterval refLocInterval = new SimpleInterval(refLoc);
    final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
    final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);
    final VariantContext untrimmedResult = annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
    return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult : GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 4 with ReferenceMemorySource

use of org.broadinstitute.hellbender.engine.ReferenceMemorySource in project gatk-protected by broadinstitute.

the class HaplotypeCallerGenotypingEngine method makeAnnotatedCall.

protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
    final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
    final SimpleInterval refLocInterval = new SimpleInterval(refLoc);
    final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
    final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);
    final VariantContext untrimmedResult = annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
    return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult : GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 5 with ReferenceMemorySource

use of org.broadinstitute.hellbender.engine.ReferenceMemorySource in project gatk by broadinstitute.

the class BAQUnitTest method createData1.

@DataProvider(name = "data")
public Object[][] createData1() {
    List<BAQTest> params = new ArrayList<>();
    SAMSequenceDictionary dict = new SAMSequenceDictionary();
    dict.addSequence(new SAMSequenceRecord("1", Integer.MAX_VALUE));
    params.add(new BAQTest("GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTTCTCCTGCTCCTGCGTGATCCGCTGCAG", "GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTCCTCCTGCTCCTGCGTGATCCGCTGCAG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGIIHGGGIHHIIHIIHIIHGICCIGEII@IGIHCG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGII410..0HIIHIIHIIHGICCIGEII@IGIHCE"));
    params.add(new BAQTest("GCTTTTTCTCCTCCTG", "GCTTTTCCTCCTCCTG", "IIHGGGIHHIIHHIIH", "EI410..0HIIHHIIE"));
    final String refString1 = "AAATTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAG";
    final ReferenceDataSource rds1 = new ReferenceMemorySource(new ReferenceBases(refString1.getBytes(), new SimpleInterval("1", 9999807, 10000032)), dict);
    // big and complex, also does a cap from 3 to 4!
    params.add(new BAQTest(-3, 9999810L, "49M1I126M1I20M1I25M", refString1, "TTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGTAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCCACCCACCACCACGCCTGGCCTAATTTTTTTGTATTTTTAGTAGAGA", ">IHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?B3BBC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@C<>5;<A5=A;>=64>???B>=6497<<;;<;>2?>BA@??A6<<A59", ">EHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?838BC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@%<>5;<A5=A;>=64>???B;86497<<;;<;>2?>BA@??A6<<A59", rds1));
    final String refString2 = "CCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCC";
    final ReferenceDataSource rds2 = new ReferenceMemorySource(new ReferenceBases(refString2.getBytes(), new SimpleInterval("1", 9999963, 10000004)), dict);
    // now changes
    params.add(new BAQTest(-3, 9999966L, "36M", refString2, "AGTAGCTGGGACTACAGGCACCCACCACCACGCCTG", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", rds2));
    final String refString3 = "CCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATA";
    final ReferenceDataSource rds3 = new ReferenceMemorySource(new ReferenceBases(refString3.getBytes(), new SimpleInterval("1", 9999990, 10000031)), dict);
    // raw base qualities are low -- but they shouldn't be capped
    params.add(new BAQTest(-3, 9999993L, "4=13X2=3X1=4X2=4X1=2X", refString3, "CCACGCTTGGCAAAGTTTTCCGTACGTTTAGCCGAG", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", rds3));
    List<Object[]> params2 = new ArrayList<>();
    for (BAQTest x : params) params2.add(new Object[] { x });
    return params2.toArray(new Object[][] {});
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DataProvider(org.testng.annotations.DataProvider)

Aggregations

ReferenceDataSource (org.broadinstitute.hellbender.engine.ReferenceDataSource)5 ReferenceMemorySource (org.broadinstitute.hellbender.engine.ReferenceMemorySource)5 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)5 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)4 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)2 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)2 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 ArrayList (java.util.ArrayList)1 ReadContextData (org.broadinstitute.hellbender.engine.ReadContextData)1 StandardCovariateList (org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList)1 GATKVariant (org.broadinstitute.hellbender.utils.variant.GATKVariant)1 DataProvider (org.testng.annotations.DataProvider)1 Test (org.testng.annotations.Test)1