Search in sources :

Example 1 with DownsamplingMethod

use of org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod in project gatk by broadinstitute.

the class LIBSDownsamplingInfoUnitTest method testToLibsInfoNone.

@Test
public void testToLibsInfoNone() throws Exception {
    final DownsamplingMethod none = DownsamplingMethod.NONE;
    final LIBSDownsamplingInfo libsDownsamplingInfo = LIBSDownsamplingInfo.toDownsamplingInfo(none);
    Assert.assertFalse(libsDownsamplingInfo.isPerformDownsampling());
    Assert.assertEquals(libsDownsamplingInfo.getToCoverage(), 0);
}
Also used : DownsamplingMethod(org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 2 with DownsamplingMethod

use of org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod in project gatk by broadinstitute.

the class LIBSDownsamplingInfoUnitTest method testToLibsInfoBySampleToCoverage.

@Test
public void testToLibsInfoBySampleToCoverage() throws Exception {
    final DownsamplingMethod bySample = new DownsamplingMethod(DownsamplingMethod.DEFAULT_DOWNSAMPLING_TYPE, 100, null);
    final LIBSDownsamplingInfo libsDownsamplingInfo = LIBSDownsamplingInfo.toDownsamplingInfo(bySample);
    Assert.assertTrue(libsDownsamplingInfo.isPerformDownsampling());
    Assert.assertEquals(libsDownsamplingInfo.getToCoverage(), 100);
}
Also used : DownsamplingMethod(org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with DownsamplingMethod

use of org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod in project gatk by broadinstitute.

the class LIBSDownsamplingInfoUnitTest method testToLibsInfoBySampleToFraction.

@Test
public void testToLibsInfoBySampleToFraction() throws Exception {
    final DownsamplingMethod bySample = new DownsamplingMethod(DownsamplingMethod.DEFAULT_DOWNSAMPLING_TYPE, null, 0.1);
    final LIBSDownsamplingInfo libsDownsamplingInfo = LIBSDownsamplingInfo.toDownsamplingInfo(bySample);
    Assert.assertFalse(libsDownsamplingInfo.isPerformDownsampling());
    Assert.assertEquals(libsDownsamplingInfo.getToCoverage(), 0);
}
Also used : DownsamplingMethod(org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with DownsamplingMethod

use of org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod in project gatk by broadinstitute.

the class LocusIteratorByStateUnitTest method testLIBS_ComplexPileupTests.

@Test(enabled = true, dataProvider = "LIBS_ComplexPileupTests")
public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, final int nLoci, final int nSamples, final boolean keepReads, final boolean grabReadsAfterEachCycle, final int downsampleTo) {
    final int readLength = 10;
    final boolean downsample = downsampleTo != -1;
    final DownsamplingMethod downsampler = downsample ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null) : new DownsamplingMethod(DownsampleType.NONE, null, null);
    final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
    bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
    final List<GATKRead> reads = bamBuilder.makeReads();
    final LocusIteratorByState li;
    li = new LocusIteratorByState(new FakeCloseableIterator<>(reads.iterator()), downsampler, keepReads, bamBuilder.getSamples(), bamBuilder.getHeader(), true);
    final Set<GATKRead> seenSoFar = new LinkedHashSet<>();
    final Set<GATKRead> keptReads = new LinkedHashSet<>();
    int bpVisited = 0;
    while (li.hasNext()) {
        bpVisited++;
        final AlignmentContext alignmentContext = li.next();
        final ReadPileup p = alignmentContext.getBasePileup();
        AssertWellOrderedPileup(p);
        if (downsample) {
        // just not a safe test
        //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
        } else {
            final int minPileupSize = nReadsPerLocus * nSamples;
            Assert.assertTrue(p.size() >= minPileupSize);
        }
        // the number of reads starting here
        int nReadsStartingHere = 0;
        for (final GATKRead read : p.getReads()) if (read.getStart() == alignmentContext.getPosition())
            nReadsStartingHere++;
        // we can have no more than maxDownsampledCoverage per sample
        final int maxCoveragePerLocus = downsample ? downsampleTo : nReadsPerLocus;
        Assert.assertTrue(nReadsStartingHere <= maxCoveragePerLocus * nSamples);
        seenSoFar.addAll(p.getReads());
        if (keepReads && grabReadsAfterEachCycle) {
            final List<GATKRead> locusReads = li.transferReadsFromAllPreviousPileups();
            if (downsample) {
                // with downsampling we might have some reads here that were downsampled away
                // in the pileup.  We want to ensure that no more than the max coverage per sample is added
                Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
                Assert.assertTrue(locusReads.size() <= maxCoveragePerLocus * nSamples);
            } else {
                Assert.assertEquals(locusReads.size(), nReadsStartingHere);
            }
            keptReads.addAll(locusReads);
            // check that all reads we've seen so far are in our keptReads
            for (final GATKRead read : seenSoFar) {
                Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
            }
        }
        if (!keepReads)
            Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty");
    }
    if (keepReads && !grabReadsAfterEachCycle)
        keptReads.addAll(li.transferReadsFromAllPreviousPileups());
    if (!downsample) {
        // downsampling may drop loci
        final int expectedBpToVisit = nLoci + readLength - 1;
        Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
    }
    if (keepReads) {
        // check we have the right number of reads
        final int totalReads = nLoci * nReadsPerLocus * nSamples;
        if (!downsample) {
            // downsampling may drop reads
            Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal");
            // check that the order of reads is the same as in our read list
            for (int i = 0; i < reads.size(); i++) {
                final GATKRead inputRead = reads.get(i);
                final GATKRead keptRead = reads.get(i);
                Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i);
            }
        } else {
            Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal");
        }
        // check uniqueness
        final Set<String> readNames = new LinkedHashSet<>();
        for (final GATKRead read : keptReads) {
            Assert.assertFalse(readNames.contains(read.getName()), "Found duplicate reads in the kept reads");
            readNames.add(read.getName());
        }
        // check that all reads we've seen are in our keptReads
        for (final GATKRead read : seenSoFar) {
            Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
        }
        if (!downsample) {
            // check that every read in the list of keep reads occurred at least once in one of the pileups
            for (final GATKRead keptRead : keptReads) {
                Assert.assertTrue(seenSoFar.contains(keptRead), "There's a read " + keptRead + " in our keptReads list that never appeared in any pileup");
            }
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) ArtificialBAMBuilder(org.broadinstitute.hellbender.utils.read.ArtificialBAMBuilder) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) DownsamplingMethod(org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod) Test(org.testng.annotations.Test)

Example 5 with DownsamplingMethod

use of org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod in project gatk by broadinstitute.

the class LocusIteratorByStateUnitTest method testLIBS_NotHoldingTooManyReads.

@Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads")
public void testLIBS_NotHoldingTooManyReads(final int nReadsPerLocus, final int downsampleTo, final int payloadInBytes) {
    logger.warn(String.format("testLIBS_NotHoldingTooManyReads %d %d %d", nReadsPerLocus, downsampleTo, payloadInBytes));
    final int readLength = 10;
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 100000);
    final int nSamples = 1;
    final List<String> samples = new ArrayList<>(nSamples);
    for (int i = 0; i < nSamples; i++) {
        final SAMReadGroupRecord rg = new SAMReadGroupRecord("rg" + i);
        final String sample = "sample" + i;
        samples.add(sample);
        rg.setSample(sample);
        rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
        header.addReadGroup(rg);
    }
    final boolean downsample = downsampleTo != -1;
    final DownsamplingMethod downsampler = downsample ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null) : new DownsamplingMethod(DownsampleType.NONE, null, null);
    final WeakReadTrackingIterator iterator = new WeakReadTrackingIterator(nReadsPerLocus, readLength, payloadInBytes, header);
    final LocusIteratorByState li;
    li = new LocusIteratorByState(iterator, downsampler, false, samples, header, true);
    while (li.hasNext()) {
        final AlignmentContext next = li.next();
        Assert.assertTrue(next.getBasePileup().size() <= downsampleTo, "Too many elements in pileup " + next);
    // TODO -- assert that there are <= X reads in memory after GC for some X
    }
}
Also used : AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) DownsamplingMethod(org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Test(org.testng.annotations.Test)

Aggregations

DownsamplingMethod (org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod)5 Test (org.testng.annotations.Test)5 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)3 AlignmentContext (org.broadinstitute.hellbender.engine.AlignmentContext)2 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)1 ArtificialBAMBuilder (org.broadinstitute.hellbender.utils.read.ArtificialBAMBuilder)1 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)1