Search in sources :

Example 1 with ReadsDataSource

use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.

the class IntervalAlignmentContextIteratorUnitTest method getAlignmentContexts.

private List<AlignmentContext> getAlignmentContexts(final List<SimpleInterval> locusIntervals, final String bamPath) {
    final List<String> sampleNames = Collections.singletonList("NA12878");
    final ReadsDataSource gatkReads = new ReadsDataSource(IOUtils.getPath(bamPath));
    final SAMFileHeader header = gatkReads.getHeader();
    final Stream<GATKRead> filteredReads = Utils.stream(gatkReads).filter(new WellformedReadFilter(header).and(new ReadFilterLibrary.MappedReadFilter()));
    final SAMSequenceDictionary dictionary = header.getSequenceDictionary();
    final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true);
    List<SimpleInterval> relevantIntervals = locusIntervals;
    if (relevantIntervals == null) {
        relevantIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
    }
    final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(relevantIntervals.iterator());
    final IntervalAlignmentContextIterator intervalAlignmentContextIterator = new IntervalAlignmentContextIterator(locusIteratorByState, intervalLocusIterator, dictionary);
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(intervalAlignmentContextIterator, Spliterator.ORDERED), false).collect(Collectors.toList());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 2 with ReadsDataSource

use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.

the class PrintReadsIntegrationTest method testUnmappedReadInclusion.

@Test(dataProvider = "UnmappedReadInclusionTestData")
public void testUnmappedReadInclusion(final File input, final String reference, final List<String> intervalStrings, final List<String> expectedReadNames) {
    final File outFile = createTempFile("testUnmappedReadInclusion", ".bam");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add("-I");
    args.add(input.getAbsolutePath());
    args.add("-O");
    args.add(outFile.getAbsolutePath());
    for (final String intervalString : intervalStrings) {
        args.add("-L");
        args.add(intervalString);
    }
    if (reference != null) {
        args.add("-R");
        args.add(reference);
    }
    runCommandLine(args);
    try (final ReadsDataSource outputReadsSource = new ReadsDataSource(outFile.toPath())) {
        final List<GATKRead> actualReads = new ArrayList<>();
        for (final GATKRead read : outputReadsSource) {
            actualReads.add(read);
        }
        Assert.assertEquals(actualReads.size(), expectedReadNames.size(), "Wrong number of reads output");
        for (int readNumber = 0; readNumber < actualReads.size(); ++readNumber) {
            Assert.assertEquals(actualReads.get(readNumber).getName(), expectedReadNames.get(readNumber), "Unexpected read name");
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ArgumentsBuilder(org.broadinstitute.hellbender.utils.test.ArgumentsBuilder) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 3 with ReadsDataSource

use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.

the class ReadsSparkSourceUnitTest method getSerialReads.

/**
     * Loads Reads using samReaderFactory, then calling ctx.parallelize.
     * @param bam file to load
     * @return RDD of (SAMRecord-backed) GATKReads from the file.
     */
public JavaRDD<GATKRead> getSerialReads(final JavaSparkContext ctx, final String bam, final String referencePath, final ValidationStringency validationStringency) {
    final SAMFileHeader readsHeader = new ReadsSparkSource(ctx, validationStringency).getHeader(bam, referencePath);
    final SamReaderFactory samReaderFactory;
    if (referencePath != null) {
        final File reference = new File(referencePath);
        samReaderFactory = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference);
    } else {
        samReaderFactory = SamReaderFactory.makeDefault().validationStringency(validationStringency);
    }
    ReadsDataSource bam2 = new ReadsDataSource(IOUtils.getPath(bam), samReaderFactory);
    List<GATKRead> records = Lists.newArrayList();
    for (GATKRead read : bam2) {
        records.add(read);
    }
    return ctx.parallelize(records);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) File(java.io.File)

Example 4 with ReadsDataSource

use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk by broadinstitute.

the class SparkUtilsUnitTest method testConvertHeaderlessHadoopBamShardToBam.

@Test
public void testConvertHeaderlessHadoopBamShardToBam() throws Exception {
    final File bamShard = new File(publicTestDir + "org/broadinstitute/hellbender/utils/spark/reads_data_source_test1.bam.headerless.part-r-00000");
    final File output = createTempFile("testConvertHadoopBamShardToBam", ".bam");
    final File headerSource = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
    final int expectedReadCount = 11;
    boolean shardIsNotValidBam = false;
    try (final ReadsDataSource readsSource = new ReadsDataSource(bamShard.toPath())) {
        for (final GATKRead read : readsSource) {
        }
    } catch (SAMFormatException e) {
        shardIsNotValidBam = true;
    }
    Assert.assertTrue(shardIsNotValidBam, "Input shard should not be a valid BAM");
    SAMFileHeader header = null;
    try (final SamReader headerReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(headerSource)) {
        header = headerReader.getFileHeader();
    } catch (IOException e) {
        throw new UserException("Error reading header from " + headerSource.getAbsolutePath(), e);
    }
    SparkUtils.convertHeaderlessHadoopBamShardToBam(bamShard, header, output);
    int actualCount = 0;
    try (final ReadsDataSource readsSource = new ReadsDataSource(output.toPath())) {
        for (final GATKRead read : readsSource) {
            ++actualCount;
        }
    }
    Assert.assertEquals(actualCount, expectedReadCount, "Wrong number of reads in final BAM file");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 5 with ReadsDataSource

use of org.broadinstitute.hellbender.engine.ReadsDataSource in project gatk-protected by broadinstitute.

the class HaplotypeCallerIntegrationTest method testBamoutProducesReasonablySizedOutput.

@Test
public void testBamoutProducesReasonablySizedOutput() {
    Utils.resetRandomGenerator();
    // We will test that when running with -bamout over the testInterval, we produce
    // a bam with a number of reads that is within 10% of what GATK3.5 produces with
    // -bamout over the same interval. This is just to test that we produce a reasonably-sized
    // bam for the region, not to validate the haplotypes, etc. We don't want
    // this test to fail unless there is a likely problem with -bamout itself (eg., empty
    // or truncated bam).
    final String testInterval = "20:10000000-10010000";
    final int gatk3BamoutNumReads = 5170;
    final File vcfOutput = createTempFile("testBamoutProducesReasonablySizedOutput", ".vcf");
    final File bamOutput = createTempFile("testBamoutProducesReasonablySizedOutput", ".bam");
    final String[] args = { "-I", NA12878_20_21_WGS_bam, "-R", b37_reference_20_21, "-L", testInterval, "-O", vcfOutput.getAbsolutePath(), "-bamout", bamOutput.getAbsolutePath(), "-pairHMM", "AVX_LOGLESS_CACHING", "-stand_call_conf", "30.0" };
    runCommandLine(args);
    try (final ReadsDataSource bamOutReadsSource = new ReadsDataSource(bamOutput.toPath())) {
        int actualBamoutNumReads = 0;
        for (final GATKRead read : bamOutReadsSource) {
            ++actualBamoutNumReads;
        }
        final int readCountDifference = Math.abs(actualBamoutNumReads - gatk3BamoutNumReads);
        Assert.assertTrue(((double) readCountDifference / gatk3BamoutNumReads) < 0.10, "-bamout produced a bam with over 10% fewer/more reads than expected");
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

ReadsDataSource (org.broadinstitute.hellbender.engine.ReadsDataSource)10 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)8 File (java.io.File)7 Test (org.testng.annotations.Test)7 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)4 IOException (java.io.IOException)3 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)3 UserException (org.broadinstitute.hellbender.exceptions.UserException)2 ArgumentsBuilder (org.broadinstitute.hellbender.utils.test.ArgumentsBuilder)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 MetricsFile (htsjdk.samtools.metrics.MetricsFile)1 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1