use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class AssemblyRegionUnitTest method makeBadReadsTest.
// -----------------------------------------------------------------------------------------------
//
// Make sure bad inputs are properly detected
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "BadReadsTest")
public Object[][] makeBadReadsTest() {
List<Object[]> tests = new ArrayList<>();
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 0, 9, 10) });
tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 1, 9, 10) });
tests.add(new Object[] { header, ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10, 10), ArtificialReadUtils.createArtificialRead(header, "read2", 0, 9, 10) });
return tests.toArray(new Object[][] {});
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class AssemblyRegionUnitTest method makeAssemblyRegionReads.
@DataProvider(name = "AssemblyRegionReads")
public Object[][] makeAssemblyRegionReads() {
final List<Object[]> tests = new ArrayList<>();
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
for (final int start : Arrays.asList(1, 10, 100, contigLength - 10, contigLength - 1)) {
for (final int readStartOffset : Arrays.asList(-100, -10, 0, 10, 100)) {
for (final int readSize : Arrays.asList(10, 100, 1000)) {
final SimpleInterval loc = IntervalUtils.trimIntervalToContig(contig, start, start + 10, header.getSequence(contig).getSequenceLength());
final int readStart = Math.max(start + readStartOffset, 1);
final int readStop = Math.min(readStart + readSize, contigLength);
final int readLength = readStop - readStart + 1;
if (readLength > 0) {
final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read", 0, readStart, readLength);
final SimpleInterval readLoc = new SimpleInterval(read);
if (readLoc.overlaps(loc)) {
tests.add(new Object[] { loc, read });
}
}
}
}
}
//HACK!
return tests.subList(2, 3).toArray(new Object[][] {});
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadsSparkSinkUnitTest method readsSinkADAMTest.
@Test(enabled = false, dataProvider = "loadReadsADAM", groups = "spark")
public void readsSinkADAMTest(String inputBam, String outputDirectoryName) throws IOException {
// Since the test requires that we not create the actual output directory in advance,
// we instead create its parent directory and mark it for deletion on exit. This protects
// us from naming collisions across multiple instances of the test suite.
final File outputParentDirectory = createTempDir(outputDirectoryName + "_parent");
final File outputDirectory = new File(outputParentDirectory, outputDirectoryName);
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadsSparkSource readSource = new ReadsSparkSource(ctx);
JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, null).filter(// filter out unmapped reads (see comment below)
r -> !r.isUnmapped());
SAMFileHeader header = readSource.getHeader(inputBam, null);
ReadsSparkSink.writeReads(ctx, outputDirectory.getAbsolutePath(), null, rddParallelReads, header, ReadsWriteFormat.ADAM);
JavaRDD<GATKRead> rddParallelReads2 = readSource.getADAMReads(outputDirectory.getAbsolutePath(), null, header);
Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
// Test the round trip
//make a mutable copy for sort
List<GATKRead> samList = new ArrayList<>(rddParallelReads.collect());
//make a mutable copy for sort
List<GATKRead> adamList = new ArrayList<>(rddParallelReads2.collect());
Comparator<GATKRead> comparator = new ReadCoordinateComparator(header);
samList.sort(comparator);
adamList.sort(comparator);
for (int i = 0; i < samList.size(); i++) {
SAMRecord expected = samList.get(i).convertToSAMRecord(header);
SAMRecord observed = adamList.get(i).convertToSAMRecord(header);
// manually test equality of some fields, as there are issues with roundtrip BAM -> ADAM -> BAM
// see https://github.com/bigdatagenomics/adam/issues/823
Assert.assertEquals(observed.getReadName(), expected.getReadName(), "readname");
Assert.assertEquals(observed.getAlignmentStart(), expected.getAlignmentStart(), "getAlignmentStart");
Assert.assertEquals(observed.getAlignmentEnd(), expected.getAlignmentEnd(), "getAlignmentEnd");
Assert.assertEquals(observed.getFlags(), expected.getFlags(), "getFlags");
Assert.assertEquals(observed.getMappingQuality(), expected.getMappingQuality(), "getMappingQuality");
Assert.assertEquals(observed.getMateAlignmentStart(), expected.getMateAlignmentStart(), "getMateAlignmentStart");
Assert.assertEquals(observed.getCigar(), expected.getCigar(), "getCigar");
}
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadsSparkSinkUnitTest method readsSinkShardedTest.
@Test(dataProvider = "loadReadsBAM", groups = "spark")
public void readsSinkShardedTest(String inputBam, String outputFileName, String referenceFile, String outputFileExtension) throws IOException {
final File outputFile = createTempFile(outputFileName, outputFileExtension);
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadsSparkSource readSource = new ReadsSparkSource(ctx);
JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referenceFile);
// ensure that the output is in two shards
rddParallelReads = rddParallelReads.repartition(2);
SAMFileHeader header = readSource.getHeader(inputBam, referenceFile);
ReadsSparkSink.writeReads(ctx, outputFile.getAbsolutePath(), referenceFile, rddParallelReads, header, ReadsWriteFormat.SHARDED);
int shards = outputFile.listFiles((dir, name) -> !name.startsWith(".") && !name.startsWith("_")).length;
Assert.assertEquals(shards, 2);
// check that no local .crc files are created
int crcs = outputFile.listFiles((dir, name) -> name.startsWith(".") && name.endsWith(".crc")).length;
Assert.assertEquals(crcs, 0);
JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(outputFile.getAbsolutePath(), referenceFile);
// reads are not globally sorted, so don't test that
Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadFilterPluginUnitTest method testReadLengthFilter.
@Test
public void testReadLengthFilter() {
final SAMFileHeader header = createHeaderWithReadGroups();
final GATKRead read = simpleGoodRead(header);
CommandLineParser clp = new CommandLineArgumentParser(new Object(), Collections.singletonList(new GATKReadFilterPluginDescriptor(null)));
String[] args = { "--readFilter", ReadLengthReadFilter.class.getSimpleName(), "--minReadLength", "10", "--maxReadLength", "20" };
clp.parseArguments(nullMessageStream, args);
ReadFilter rf = instantiateFilter(clp, header);
read.setBases(new byte[5]);
Assert.assertFalse(rf.test(read));
read.setBases(new byte[25]);
Assert.assertFalse(rf.test(read));
read.setBases(new byte[15]);
Assert.assertTrue(rf.test(read));
}
Aggregations