Search in sources :

Example 61 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadLikelihoodsUnitTest method testBestAlleleMap.

@Test(dataProvider = "dataSets")
public void testBestAlleleMap(final String[] samples, final Allele[] alleles, final Map<String, List<GATKRead>> reads) {
    final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
    fillWithRandomLikelihoods(samples, alleles, original);
    final Map<Allele, List<GATKRead>> expected = new LinkedHashMap<>(alleles.length);
    for (final Allele allele : alleles) expected.put(allele, new ArrayList<>());
    final int numberOfAlleles = alleles.length;
    for (int s = 0; s < samples.length; s++) {
        final int sampleReadCount = original.sampleReadCount(s);
        final LikelihoodMatrix<Allele> sampleMatrix = original.sampleMatrix(s);
        for (int r = 0; r < sampleReadCount; r++) {
            int bestindexOfAllele = -1;
            double bestAlleleLk = Double.NEGATIVE_INFINITY;
            double secondBestAlleleLk = Double.NEGATIVE_INFINITY;
            for (int a = 0; a < numberOfAlleles; a++) {
                final double lk = sampleMatrix.get(a, r);
                if (lk > bestAlleleLk) {
                    secondBestAlleleLk = bestAlleleLk;
                    bestAlleleLk = lk;
                    bestindexOfAllele = a;
                } else if (lk > secondBestAlleleLk) {
                    secondBestAlleleLk = lk;
                }
            }
            if ((bestAlleleLk - secondBestAlleleLk) > ReadLikelihoods.BestAllele.INFORMATIVE_THRESHOLD)
                expected.get(alleles[bestindexOfAllele]).add(sampleMatrix.getRead(r));
        }
    }
    final Map<Allele, List<GATKRead>> actual = original.readsByBestAlleleMap();
    Assert.assertEquals(actual.size(), alleles.length);
    for (final Allele allele : alleles) {
        final List<GATKRead> expectedList = expected.get(allele);
        final List<GATKRead> actualList = actual.get(allele);
        final Set<GATKRead> expectedSet = new LinkedHashSet<>(expectedList);
        final Set<GATKRead> actualSet = new LinkedHashSet<>(actualList);
        Assert.assertEquals(actualSet, expectedSet);
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Allele(htsjdk.variant.variantcontext.Allele) Test(org.testng.annotations.Test)

Example 62 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadClipperUnitTest method testHardClipByReferenceCoordinatesLeftTail.

@Test
public void testHardClipByReferenceCoordinatesLeftTail() {
    for (Cigar cigar : cigarList) {
        GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
        int alnStart = read.getStart();
        int alnEnd = read.getEnd();
        if (getSoftStart(read) == alnStart) {
            // we can't test left clipping if the read has hanging soft clips on the left side
            for (int i = alnStart; i <= alnEnd; i++) {
                GATKRead clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
                if (!clipLeft.isEmpty()) {
                    Assert.assertTrue(clipLeft.getStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getStart(), i + 1, read.getCigar().toString(), clipLeft.getCigar().toString()));
                    assertUnclippedLimits(read, clipLeft);
                }
            }
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Cigar(htsjdk.samtools.Cigar) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) SplitNCigarReadsUnitTest(org.broadinstitute.hellbender.tools.walkers.rnaseq.SplitNCigarReadsUnitTest)

Example 63 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadClipperUnitTest method testHardClipByReferenceCoordinates.

@Test
public void testHardClipByReferenceCoordinates() {
    for (Cigar cigar : cigarList) {
        GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
        int start = getSoftStart(read);
        int stop = getSoftEnd(read);
        for (int i = start; i <= stop; i++) {
            GATKRead clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
            if (!clipLeft.isEmpty()) {
                Assert.assertTrue(clipLeft.getStart() >= Math.min(read.getEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getStart(), i + 1, read.getCigar().toString(), clipLeft.getCigar().toString()));
                assertUnclippedLimits(read, clipLeft);
            }
            GATKRead clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
            if (!clipRight.isEmpty() && clipRight.getStart() <= clipRight.getEnd()) {
                // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
                Assert.assertTrue(clipRight.getEnd() <= Math.max(read.getStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getEnd(), i - 1, read.getCigar().toString(), clipRight.getCigar().toString()));
                assertUnclippedLimits(read, clipRight);
            }
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Cigar(htsjdk.samtools.Cigar) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) SplitNCigarReadsUnitTest(org.broadinstitute.hellbender.tools.walkers.rnaseq.SplitNCigarReadsUnitTest)

Example 64 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadClipperUnitTest method testHardClipLowQualEnds.

@Test
public void testHardClipLowQualEnds() {
    final byte LOW_QUAL = 2;
    final byte HIGH_QUAL = 30;
    /** create a read for every cigar permutation */
    for (Cigar cigar : cigarList) {
        GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
        int readLength = read.getLength();
        byte[] quals = new byte[readLength];
        for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
            /**  create a read with nLowQualBases in the left tail */
            Arrays.fill(quals, HIGH_QUAL);
            for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL;
            read.setBaseQualities(quals);
            GATKRead clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
            checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases);
            /** create a read with nLowQualBases in the right tail */
            Arrays.fill(quals, HIGH_QUAL);
            for (int addRight = 0; addRight < nLowQualBases; addRight++) quals[readLength - addRight - 1] = LOW_QUAL;
            read.setBaseQualities(quals);
            GATKRead clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
            checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases);
            /** create a read with nLowQualBases on both tails */
            if (nLowQualBases <= readLength / 2) {
                Arrays.fill(quals, HIGH_QUAL);
                for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
                    quals[addBoth] = LOW_QUAL;
                    quals[readLength - addBoth - 1] = LOW_QUAL;
                }
                read.setBaseQualities(quals);
                GATKRead clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
                checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2 * nLowQualBases);
            }
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Cigar(htsjdk.samtools.Cigar) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) SplitNCigarReadsUnitTest(org.broadinstitute.hellbender.tools.walkers.rnaseq.SplitNCigarReadsUnitTest)

Example 65 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk-protected by broadinstitute.

the class HaplotypeCallerSpark method createReadShards.

/**
     * Create an RDD of {@link Shard} from an RDD of {@link GATKRead}
     * @param shardBoundariesBroadcast  broadcast of an {@link OverlapDetector} loaded with the intervals that should be used for creating ReadShards
     * @param reads Rdd of {@link GATKRead}
     * @return a Rdd of reads grouped into potentially overlapping shards
     */
private static JavaRDD<Shard<GATKRead>> createReadShards(final Broadcast<OverlapDetector<ShardBoundary>> shardBoundariesBroadcast, final JavaRDD<GATKRead> reads) {
    final JavaPairRDD<ShardBoundary, GATKRead> paired = reads.flatMapToPair(read -> {
        final Collection<ShardBoundary> overlappingShards = shardBoundariesBroadcast.value().getOverlaps(read);
        return overlappingShards.stream().map(key -> new Tuple2<>(key, read)).iterator();
    });
    final JavaPairRDD<ShardBoundary, Iterable<GATKRead>> shardsWithReads = paired.groupByKey();
    return shardsWithReads.map(shard -> new SparkReadShard(shard._1(), shard._2()));
}
Also used : CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) SparkProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.SparkProgramGroup) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Advanced(org.broadinstitute.barclay.argparser.Advanced) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Function(java.util.function.Function) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) HaplotypeCallerArgumentCollection(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection) SparkReadShard(org.broadinstitute.hellbender.engine.spark.SparkReadShard) StreamSupport(java.util.stream.StreamSupport) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) org.broadinstitute.barclay.argparser(org.broadinstitute.barclay.argparser) Broadcast(org.apache.spark.broadcast.Broadcast) OverlapDetector(htsjdk.samtools.util.OverlapDetector) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Collection(java.util.Collection) GATKSparkTool(org.broadinstitute.hellbender.engine.spark.GATKSparkTool) IOException(java.io.IOException) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) HaplotypeCaller(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCaller) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) HaplotypeCallerEngine(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerEngine) Serializable(java.io.Serializable) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) List(java.util.List) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SparkReadShard(org.broadinstitute.hellbender.engine.spark.SparkReadShard) Tuple2(scala.Tuple2)

Aggregations

GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)457 Test (org.testng.annotations.Test)286 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)163 SAMFileHeader (htsjdk.samtools.SAMFileHeader)87 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)59 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)40 ArrayList (java.util.ArrayList)34 Collectors (java.util.stream.Collectors)34 List (java.util.List)30 Cigar (htsjdk.samtools.Cigar)29 File (java.io.File)28 java.util (java.util)28 DataProvider (org.testng.annotations.DataProvider)28 JavaRDD (org.apache.spark.api.java.JavaRDD)26 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)26 Assert (org.testng.Assert)25 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)24 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 Argument (org.broadinstitute.barclay.argparser.Argument)18 UserException (org.broadinstitute.hellbender.exceptions.UserException)18