use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromReadsAndReference.
@Test
public void testBestSequenceDictionary_fromReadsAndReference() throws Exception {
final GATKTool tool = new TestGATKToolWithReads();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final String fastaFile = hg19MiniReference;
final String[] args = { "-I", bamFile.getCanonicalPath(), "-R", fastaFile };
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to reference dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
final SAMSequenceDictionary fastaDict = new IndexedFastaSequenceFile(new File(fastaFile)).getSequenceDictionary();
toolDict.assertSameDictionary(fastaDict);
fastaDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, fastaDict);
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testSequential.
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
for (int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE) {
int start = i;
int stop = start + querySize;
if (stop <= contig.getSequenceLength()) {
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
// asserts for efficiency. We are going to make contig.length / STEP_SIZE queries
// at each of range: start -> start + querySize against a cache with size of X.
// we expect to hit the cache each time range falls within X. We expect a hit
// on the cache if range is within X. Which should happen at least (X - query_size * 2) / STEP_SIZE
// times.
final int minExpectedHits = (int) Math.floor((Math.min(caching.getCacheSize(), contig.getSequenceLength()) - querySize * 2.0) / STEP_SIZE);
caching.printEfficiency(Level.WARN);
Assert.assertTrue(caching.getCacheHits() >= minExpectedHits, "Expected at least " + minExpectedHits + " cache hits but only got " + caching.getCacheHits());
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method invalidIntervalDataProvider.
@DataProvider(name = "invalidIntervalTestData")
public Object[][] invalidIntervalDataProvider() throws Exception {
File fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
return new Object[][] { new Object[] { genomeLocParser, "1", 10000000, 20000000 }, new Object[] { genomeLocParser, "2", 1, 2 }, new Object[] { genomeLocParser, "1", -1, 50 } };
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method negativeOneLengthIntervalDataProvider.
// TODO - remove once a corrected version of the exome interval list is released.
@DataProvider(name = "negativeOneLengthIntervalTestData")
public Object[][] negativeOneLengthIntervalDataProvider() throws Exception {
File fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
return new Object[][] { new Object[] { genomeLocParser, "1", 2, 1 }, new Object[] { genomeLocParser, "1", 500, 499 }, new Object[] { genomeLocParser, "1", 1000, 999 } };
}
Aggregations