Search in sources :

Example 26 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SequenceDictionaryUtils method getDictionaryAsString.

/**
     * Returns a compact String representation of the sequence dictionary it's passed
     *
     * The format of the returned String is:
     * [ contig1Name(length: contig1Length) contig2Name(length: contig2Length) ... ]
     *
     * @param dict a non-null SAMSequenceDictionary
     * @return A String containing all of the contig names and lengths from the sequence dictionary it's passed
     */
public static String getDictionaryAsString(final SAMSequenceDictionary dict) {
    Utils.nonNull(dict, "Sequence dictionary must be non-null");
    StringBuilder s = new StringBuilder("[ ");
    for (SAMSequenceRecord dictionaryEntry : dict.getSequences()) {
        s.append(dictionaryEntry.getSequenceName());
        s.append("(length:");
        s.append(dictionaryEntry.getSequenceLength());
        s.append(") ");
    }
    s.append("]");
    return s.toString();
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 27 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SequenceDictionaryUtils method commonContigsAreAtSameIndices.

/**
     * Checks whether the common contigs in the given sequence dictionaries occur at the same indices
     * in both dictionaries
     *
     * @param commonContigs Set of names of the contigs that occur in both dictionaries
     * @param dict1 first sequence dictionary
     * @param dict2 second sequence dictionary
     * @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries,
     *         otherwise false
     */
private static boolean commonContigsAreAtSameIndices(final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
    for (String commonContig : commonContigs) {
        SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
        SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);
        // Each common contig must have the same index in both dictionaries
        if (dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex()) {
            return false;
        }
    }
    return true;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 28 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testIupacChanges.

@Test
public void testIupacChanges() throws FileNotFoundException, InterruptedException {
    final String testFasta = publicTestDir + "iupacFASTA.fasta";
    final CachingIndexedFastaSequenceFile iupacPreserving = new CachingIndexedFastaSequenceFile(new File(testFasta), CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE, false, true);
    final CachingIndexedFastaSequenceFile makeNs = new CachingIndexedFastaSequenceFile(new File(testFasta));
    int preservingNs = 0;
    int changingNs = 0;
    for (SAMSequenceRecord contig : iupacPreserving.getSequenceDictionary().getSequences()) {
        final String sPreserving = fetchBaseString(iupacPreserving, contig.getSequenceName(), 0, 15000);
        preservingNs += StringUtils.countMatches(sPreserving, "N");
        final String sChanging = fetchBaseString(makeNs, contig.getSequenceName(), 0, 15000);
        changingNs += StringUtils.countMatches(sChanging, "N");
    }
    Assert.assertEquals(changingNs, preservingNs + 4);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 29 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testFailOnBadBase.

@Test(expectedExceptions = { UserException.class })
public void testFailOnBadBase() throws FileNotFoundException, InterruptedException {
    final String testFasta = publicTestDir + "problematicFASTA.fasta";
    final CachingIndexedFastaSequenceFile fasta = new CachingIndexedFastaSequenceFile(new File(testFasta));
    for (SAMSequenceRecord contig : fasta.getSequenceDictionary().getSequences()) {
        fetchBaseString(fasta, contig.getSequenceName(), -1, -1);
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 30 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testMixedCasesInExample.

// make sure some bases are lower case and some are upper case
@Test
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
    final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
    final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
    final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
    int nMixedCase = 0;
    for (SAMSequenceRecord contig : original.getSequenceDictionary().getSequences()) {
        nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
        final int step = 100;
        for (int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step) {
            testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
        }
    }
    Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file.  Unexpected test state");
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4