Search in sources :

Example 36 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method makeBadReadsTest.

// -----------------------------------------------------------------------------------------------
//
// Make sure bad inputs are properly detected
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "BadReadsTest")
public Object[][] makeBadReadsTest() {
    List<Object[]> tests = new ArrayList<>();
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 0, 9, 10) });
    tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 1, 9, 10) });
    tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 0, 9, 10) });
    return tests.toArray(new Object[][] {});
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataProvider(org.testng.annotations.DataProvider)

Example 37 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class AssemblyRegionUnitTest method makeAssemblyRegionReads.

@DataProvider(name = "AssemblyRegionReads")
public Object[][] makeAssemblyRegionReads() {
    final List<Object[]> tests = new ArrayList<>();
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    for (final int start : Arrays.asList(1, 10, 100, contigLength - 10, contigLength - 1)) {
        for (final int readStartOffset : Arrays.asList(-100, -10, 0, 10, 100)) {
            for (final int readSize : Arrays.asList(10, 100, 1000)) {
                final SimpleInterval loc = IntervalUtils.trimIntervalToContig(contig, start, start + 10, header.getSequence(contig).getSequenceLength());
                final int readStart = Math.max(start + readStartOffset, 1);
                final int readStop = Math.min(readStart + readSize, contigLength);
                final int readLength = readStop - readStart + 1;
                if (readLength > 0) {
                    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read", 0, readStart, readLength);
                    final SimpleInterval readLoc = new SimpleInterval(read);
                    if (readLoc.overlaps(loc)) {
                        tests.add(new Object[] { loc, read });
                    }
                }
            }
        }
    }
    //HACK!
    return tests.subList(2, 3).toArray(new Object[][] {});
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataProvider(org.testng.annotations.DataProvider)

Example 38 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadsSparkSinkUnitTest method readsSinkADAMTest.

@Test(enabled = false, dataProvider = "loadReadsADAM", groups = "spark")
public void readsSinkADAMTest(String inputBam, String outputDirectoryName) throws IOException {
    // Since the test requires that we not create the actual output directory in advance,
    // we instead create its parent directory and mark it for deletion on exit. This protects
    // us from naming collisions across multiple instances of the test suite.
    final File outputParentDirectory = createTempDir(outputDirectoryName + "_parent");
    final File outputDirectory = new File(outputParentDirectory, outputDirectoryName);
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, null).filter(// filter out unmapped reads (see comment below)
    r -> !r.isUnmapped());
    SAMFileHeader header = readSource.getHeader(inputBam, null);
    ReadsSparkSink.writeReads(ctx, outputDirectory.getAbsolutePath(), null, rddParallelReads, header, ReadsWriteFormat.ADAM);
    JavaRDD<GATKRead> rddParallelReads2 = readSource.getADAMReads(outputDirectory.getAbsolutePath(), null, header);
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
    // Test the round trip
    //make a mutable copy for sort
    List<GATKRead> samList = new ArrayList<>(rddParallelReads.collect());
    //make a mutable copy for sort
    List<GATKRead> adamList = new ArrayList<>(rddParallelReads2.collect());
    Comparator<GATKRead> comparator = new ReadCoordinateComparator(header);
    samList.sort(comparator);
    adamList.sort(comparator);
    for (int i = 0; i < samList.size(); i++) {
        SAMRecord expected = samList.get(i).convertToSAMRecord(header);
        SAMRecord observed = adamList.get(i).convertToSAMRecord(header);
        // manually test equality of some fields, as there are issues with roundtrip BAM -> ADAM -> BAM
        // see https://github.com/bigdatagenomics/adam/issues/823
        Assert.assertEquals(observed.getReadName(), expected.getReadName(), "readname");
        Assert.assertEquals(observed.getAlignmentStart(), expected.getAlignmentStart(), "getAlignmentStart");
        Assert.assertEquals(observed.getAlignmentEnd(), expected.getAlignmentEnd(), "getAlignmentEnd");
        Assert.assertEquals(observed.getFlags(), expected.getFlags(), "getFlags");
        Assert.assertEquals(observed.getMappingQuality(), expected.getMappingQuality(), "getMappingQuality");
        Assert.assertEquals(observed.getMateAlignmentStart(), expected.getMateAlignmentStart(), "getMateAlignmentStart");
        Assert.assertEquals(observed.getCigar(), expected.getCigar(), "getCigar");
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) ReadCoordinateComparator(org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 39 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadsSparkSinkUnitTest method readsSinkShardedTest.

@Test(dataProvider = "loadReadsBAM", groups = "spark")
public void readsSinkShardedTest(String inputBam, String outputFileName, String referenceFile, String outputFileExtension) throws IOException {
    final File outputFile = createTempFile(outputFileName, outputFileExtension);
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referenceFile);
    // ensure that the output is in two shards
    rddParallelReads = rddParallelReads.repartition(2);
    SAMFileHeader header = readSource.getHeader(inputBam, referenceFile);
    ReadsSparkSink.writeReads(ctx, outputFile.getAbsolutePath(), referenceFile, rddParallelReads, header, ReadsWriteFormat.SHARDED);
    int shards = outputFile.listFiles((dir, name) -> !name.startsWith(".") && !name.startsWith("_")).length;
    Assert.assertEquals(shards, 2);
    // check that no local .crc files are created
    int crcs = outputFile.listFiles((dir, name) -> name.startsWith(".") && name.endsWith(".crc")).length;
    Assert.assertEquals(crcs, 0);
    JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(outputFile.getAbsolutePath(), referenceFile);
    // reads are not globally sorted, so don't test that
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Arrays(java.util.Arrays) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Test(org.testng.annotations.Test) FileStatus(org.apache.hadoop.fs.FileStatus) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadsWriteFormat(org.broadinstitute.hellbender.utils.read.ReadsWriteFormat) SAMFileHeader(htsjdk.samtools.SAMFileHeader) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) ArrayList(java.util.ArrayList) BucketUtils(org.broadinstitute.hellbender.utils.gcs.BucketUtils) Assert(org.testng.Assert) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) JavaRDD(org.apache.spark.api.java.JavaRDD) AfterClass(org.testng.annotations.AfterClass) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) Files(java.nio.file.Files) BeforeClass(org.testng.annotations.BeforeClass) ReadCoordinateComparator(org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator) IOException(java.io.IOException) SplittingBAMIndexer(org.seqdoop.hadoop_bam.SplittingBAMIndexer) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) List(java.util.List) SAMRecordCoordinateComparator(htsjdk.samtools.SAMRecordCoordinateComparator) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) Comparator(java.util.Comparator) MiniClusterUtils(org.broadinstitute.hellbender.utils.test.MiniClusterUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 40 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadFilterPluginUnitTest method testReadLengthFilter.

@Test
public void testReadLengthFilter() {
    final SAMFileHeader header = createHeaderWithReadGroups();
    final GATKRead read = simpleGoodRead(header);
    CommandLineParser clp = new CommandLineArgumentParser(new Object(), Collections.singletonList(new GATKReadFilterPluginDescriptor(null)));
    String[] args = { "--readFilter", ReadLengthReadFilter.class.getSimpleName(), "--minReadLength", "10", "--maxReadLength", "20" };
    clp.parseArguments(nullMessageStream, args);
    ReadFilter rf = instantiateFilter(clp, header);
    read.setBases(new byte[5]);
    Assert.assertFalse(rf.test(read));
    read.setBases(new byte[25]);
    Assert.assertFalse(rf.test(read));
    read.setBases(new byte[15]);
    Assert.assertTrue(rf.test(read));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKReadFilterPluginDescriptor(org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor) CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Test(org.testng.annotations.Test)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10