Search in sources :

Example 66 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testSequential.

private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
    Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
    final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
    SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
    for (int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE) {
        int start = i;
        int stop = start + querySize;
        if (stop <= contig.getSequenceLength()) {
            ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
            ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
            Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
            Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
            Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
        }
    }
    // asserts for efficiency.  We are going to make contig.length / STEP_SIZE queries
    // at each of range: start -> start + querySize against a cache with size of X.
    // we expect to hit the cache each time range falls within X.  We expect a hit
    // on the cache if range is within X.  Which should happen at least (X - query_size * 2) / STEP_SIZE
    // times.
    final int minExpectedHits = (int) Math.floor((Math.min(caching.getCacheSize(), contig.getSequenceLength()) - querySize * 2.0) / STEP_SIZE);
    caching.printEfficiency(Level.WARN);
    Assert.assertTrue(caching.getCacheHits() >= minExpectedHits, "Expected at least " + minExpectedHits + " cache hits but only got " + caching.getCacheHits());
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 67 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderSequential1.

@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
    final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
    SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0);
    logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
    testSequential(caching, fasta, querySize);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 68 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class ArtificialReadQueryIterator method initialize.

/**
     * initialize the query iterator
     *
     * @param contig the contig
     * @param start  the start position
     * @param stop   the stop postition
     */
private void initialize(String contig, int start, int stop) {
    // throw away data from the previous invocation, if one exists.
    ensureUntouched();
    reset();
    finalPos = stop;
    startPos = start;
    if (finalPos < 0) {
        finalPos = Integer.MAX_VALUE;
    }
    // sanity check that we have the contig
    contigIndex = -1;
    List<SAMSequenceRecord> list = header.getSequenceDictionary().getSequences();
    for (SAMSequenceRecord rec : list) {
        if (rec.getSequenceName().equals(contig)) {
            contigIndex = rec.getSequenceIndex();
        }
    }
    if (contigIndex < 0) {
        throw new IllegalArgumentException("ArtificialContig" + contig + " doesn't exist");
    }
    while (super.hasNext() && ReadUtils.getReferenceIndex(this.peek(), header) < contigIndex) {
        super.next();
    }
    if (!super.hasNext()) {
        throw new GATKException("Unable to find the target chromosome");
    }
    while (super.hasNext() && this.peek().getStart() < start) {
        super.next();
    }
    // sanity check that we have an actual matching read next
    GATKRead rec = this.peek();
    if (!matches(rec)) {
        throw new GATKException("The next read doesn't match");
    }
    // set the seeked variable to true
    seeked = true;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 69 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class AlignedAssemblyOrExcuse method writeIntervalFile.

/**
     * write a file describing each interval
     */
public static void writeIntervalFile(final String intervalFile, final SAMFileHeader header, final List<SVInterval> intervals, final List<AlignedAssemblyOrExcuse> intervalDispositions) {
    final Map<Integer, AlignedAssemblyOrExcuse> resultsMap = new HashMap<>();
    intervalDispositions.forEach(alignedAssemblyOrExcuse -> resultsMap.put(alignedAssemblyOrExcuse.getAssemblyId(), alignedAssemblyOrExcuse));
    try (final OutputStreamWriter writer = new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalFile)))) {
        final List<SAMSequenceRecord> contigs = header.getSequenceDictionary().getSequences();
        final int nIntervals = intervals.size();
        for (int intervalId = 0; intervalId != nIntervals; ++intervalId) {
            final SVInterval interval = intervals.get(intervalId);
            final String seqName = contigs.get(interval.getContig()).getSequenceName();
            final AlignedAssemblyOrExcuse alignedAssemblyOrExcuse = resultsMap.get(intervalId);
            final String disposition;
            if (alignedAssemblyOrExcuse == null) {
                disposition = "unknown";
            } else if (alignedAssemblyOrExcuse.getErrorMessage() != null) {
                disposition = alignedAssemblyOrExcuse.getErrorMessage();
            } else {
                disposition = "produced " + alignedAssemblyOrExcuse.getAssembly().getNContigs() + " contigs";
            }
            writer.write(intervalId + "\t" + seqName + ":" + interval.getStart() + "-" + interval.getEnd() + "\t" + disposition + "\n");
        }
    } catch (final IOException ioe) {
        throw new GATKException("Can't write intervals file " + intervalFile, ioe);
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) OutputStreamWriter(java.io.OutputStreamWriter) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) BufferedOutputStream(java.io.BufferedOutputStream)

Example 70 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class LocalReadShardUnitTest method divideIntervalIntoShardsInvalidTestData.

@DataProvider(name = "DivideIntervalIntoShardsInvalidTestData")
public Object[][] divideIntervalIntoShardsInvalidTestData() {
    final ReadsDataSource readsSource = new ReadsDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
    final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", 16000)));
    return new Object[][] { // contig not in dictionary
    { new SimpleInterval("2", 1, 10), 1, 1, 0, readsSource, dictionary }, // interval not within contig bounds
    { new SimpleInterval("1", 1, 17000), 1, 1, 0, readsSource, dictionary }, // shardSize == 0
    { new SimpleInterval("1", 1, 10), 0, 1, 0, readsSource, dictionary }, // shardSize < 0
    { new SimpleInterval("1", 1, 10), -1, 1, 0, readsSource, dictionary }, // shardStep == 0
    { new SimpleInterval("1", 1, 10), 1, 0, 0, readsSource, dictionary }, // shardStep < 0
    { new SimpleInterval("1", 1, 10), 1, -1, 0, readsSource, dictionary }, // shardPadding < 0
    { new SimpleInterval("1", 1, 10), 1, 1, -1, readsSource, dictionary }, // null interval
    { null, 1, 1, 0, readsSource, dictionary }, // null data source
    { new SimpleInterval("1", 1, 10), 1, 1, 0, null, dictionary }, // null dictionary
    { new SimpleInterval("1", 1, 10), 1, 1, 0, readsSource, null } };
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DataProvider(org.testng.annotations.DataProvider)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4