use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testSequential.
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
for (int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE) {
int start = i;
int stop = start + querySize;
if (stop <= contig.getSequenceLength()) {
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
// asserts for efficiency. We are going to make contig.length / STEP_SIZE queries
// at each of range: start -> start + querySize against a cache with size of X.
// we expect to hit the cache each time range falls within X. We expect a hit
// on the cache if range is within X. Which should happen at least (X - query_size * 2) / STEP_SIZE
// times.
final int minExpectedHits = (int) Math.floor((Math.min(caching.getCacheSize(), contig.getSequenceLength()) - querySize * 2.0) / STEP_SIZE);
caching.printEfficiency(Level.WARN);
Assert.assertTrue(caching.getCacheHits() >= minExpectedHits, "Expected at least " + minExpectedHits + " cache hits but only got " + caching.getCacheHits());
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderSequential1.
@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0);
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
testSequential(caching, fasta, querySize);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class ArtificialReadQueryIterator method initialize.
/**
* initialize the query iterator
*
* @param contig the contig
* @param start the start position
* @param stop the stop postition
*/
private void initialize(String contig, int start, int stop) {
// throw away data from the previous invocation, if one exists.
ensureUntouched();
reset();
finalPos = stop;
startPos = start;
if (finalPos < 0) {
finalPos = Integer.MAX_VALUE;
}
// sanity check that we have the contig
contigIndex = -1;
List<SAMSequenceRecord> list = header.getSequenceDictionary().getSequences();
for (SAMSequenceRecord rec : list) {
if (rec.getSequenceName().equals(contig)) {
contigIndex = rec.getSequenceIndex();
}
}
if (contigIndex < 0) {
throw new IllegalArgumentException("ArtificialContig" + contig + " doesn't exist");
}
while (super.hasNext() && ReadUtils.getReferenceIndex(this.peek(), header) < contigIndex) {
super.next();
}
if (!super.hasNext()) {
throw new GATKException("Unable to find the target chromosome");
}
while (super.hasNext() && this.peek().getStart() < start) {
super.next();
}
// sanity check that we have an actual matching read next
GATKRead rec = this.peek();
if (!matches(rec)) {
throw new GATKException("The next read doesn't match");
}
// set the seeked variable to true
seeked = true;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class AlignedAssemblyOrExcuse method writeIntervalFile.
/**
* write a file describing each interval
*/
public static void writeIntervalFile(final String intervalFile, final SAMFileHeader header, final List<SVInterval> intervals, final List<AlignedAssemblyOrExcuse> intervalDispositions) {
final Map<Integer, AlignedAssemblyOrExcuse> resultsMap = new HashMap<>();
intervalDispositions.forEach(alignedAssemblyOrExcuse -> resultsMap.put(alignedAssemblyOrExcuse.getAssemblyId(), alignedAssemblyOrExcuse));
try (final OutputStreamWriter writer = new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalFile)))) {
final List<SAMSequenceRecord> contigs = header.getSequenceDictionary().getSequences();
final int nIntervals = intervals.size();
for (int intervalId = 0; intervalId != nIntervals; ++intervalId) {
final SVInterval interval = intervals.get(intervalId);
final String seqName = contigs.get(interval.getContig()).getSequenceName();
final AlignedAssemblyOrExcuse alignedAssemblyOrExcuse = resultsMap.get(intervalId);
final String disposition;
if (alignedAssemblyOrExcuse == null) {
disposition = "unknown";
} else if (alignedAssemblyOrExcuse.getErrorMessage() != null) {
disposition = alignedAssemblyOrExcuse.getErrorMessage();
} else {
disposition = "produced " + alignedAssemblyOrExcuse.getAssembly().getNContigs() + " contigs";
}
writer.write(intervalId + "\t" + seqName + ":" + interval.getStart() + "-" + interval.getEnd() + "\t" + disposition + "\n");
}
} catch (final IOException ioe) {
throw new GATKException("Can't write intervals file " + intervalFile, ioe);
}
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class LocalReadShardUnitTest method divideIntervalIntoShardsInvalidTestData.
@DataProvider(name = "DivideIntervalIntoShardsInvalidTestData")
public Object[][] divideIntervalIntoShardsInvalidTestData() {
final ReadsDataSource readsSource = new ReadsDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", 16000)));
return new Object[][] { // contig not in dictionary
{ new SimpleInterval("2", 1, 10), 1, 1, 0, readsSource, dictionary }, // interval not within contig bounds
{ new SimpleInterval("1", 1, 17000), 1, 1, 0, readsSource, dictionary }, // shardSize == 0
{ new SimpleInterval("1", 1, 10), 0, 1, 0, readsSource, dictionary }, // shardSize < 0
{ new SimpleInterval("1", 1, 10), -1, 1, 0, readsSource, dictionary }, // shardStep == 0
{ new SimpleInterval("1", 1, 10), 1, 0, 0, readsSource, dictionary }, // shardStep < 0
{ new SimpleInterval("1", 1, 10), 1, -1, 0, readsSource, dictionary }, // shardPadding < 0
{ new SimpleInterval("1", 1, 10), 1, 1, -1, readsSource, dictionary }, // null interval
{ null, 1, 1, 0, readsSource, dictionary }, // null data source
{ new SimpleInterval("1", 1, 10), 1, 1, 0, null, dictionary }, // null dictionary
{ new SimpleInterval("1", 1, 10), 1, 1, 0, readsSource, null } };
}
Aggregations