Search in sources :

Example 66 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SVVCFWriterUnitTest method testSortVariantsByCoordinate.

@Test
public void testSortVariantsByCoordinate() {
    final String insOne = "AAA";
    new String(SVDiscoveryTestDataProvider.makeDummySequence(100, (byte) 'A'));
    final String insTwo = "AAC";
    new String(SVDiscoveryTestDataProvider.makeDummySequence(100, (byte) 'C'));
    final String contig = "21";
    final int pos = 100001;
    final int end = 100501;
    final VariantContext inversionOne = new VariantContextBuilder().chr(contig).start(pos).stop(end).alleles("G", SVConstants.DiscoveryStepConstants.VCF_ALT_ALLELE_STRING_INV).attribute(GATKSVVCFHeaderLines.INSERTED_SEQUENCE, insOne).make();
    final VariantContext inversionTwo = new VariantContextBuilder().chr(contig).start(pos).stop(end).alleles("G", SVConstants.DiscoveryStepConstants.VCF_ALT_ALLELE_STRING_INV).attribute(GATKSVVCFHeaderLines.INSERTED_SEQUENCE, insTwo).make();
    final VariantContext upstreamVariant = new VariantContextBuilder().chr(contig).start(pos - 50).stop(end).alleles("T", SVConstants.DiscoveryStepConstants.VCF_ALT_ALLELE_STRING_DUP).attribute(GATKSVVCFHeaderLines.INSERTED_SEQUENCE, insOne).make();
    final VariantContext downstreamVariant = new VariantContextBuilder().chr(contig).start(pos + 20).stop(end + 20).alleles("C", SVConstants.DiscoveryStepConstants.VCF_ALT_ALLELE_STRING_INS).attribute(GATKSVVCFHeaderLines.INSERTED_SEQUENCE, insOne).make();
    final File referenceDictionaryFile = new File(ReferenceUtils.getFastaDictionaryFileName(b37_reference_20_21));
    final SAMSequenceDictionary dictionary = ReferenceUtils.loadFastaDictionary(referenceDictionaryFile);
    final List<VariantContext> sortedVariants = SVVCFWriter.sortVariantsByCoordinate(Arrays.asList(downstreamVariant, inversionTwo, inversionOne, upstreamVariant), dictionary);
    Assert.assertEquals(sortedVariants.size(), 4);
    Assert.assertEquals(sortedVariants.get(0), upstreamVariant);
    Assert.assertEquals(sortedVariants.get(1), inversionOne);
    Assert.assertEquals(sortedVariants.get(2), inversionTwo);
    Assert.assertEquals(sortedVariants.get(3), downstreamVariant);
}
Also used : VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 67 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SequenceDictionaryUtilsUnitTest method testSequenceDictionaryComparison.

@Test(dataProvider = "SequenceDictionaryDataProvider")
public void testSequenceDictionaryComparison(final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs, final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, final Class<? extends UserException> expectedExceptionUponValidation, final boolean requireSuperset, final boolean checkContigOrdering) {
    final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
    final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
    final String testDescription = String.format("First dictionary: %s  Second dictionary: %s", SequenceDictionaryUtils.getDictionaryAsString(firstDictionary), SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
    final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility = SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary, checkContigOrdering);
    Assert.assertTrue(reportedCompatibility == dictionaryCompatibility, String.format("Dictionary comparison should have returned %s but instead returned %s. %s", dictionaryCompatibility, reportedCompatibility, testDescription));
}
Also used : SequenceDictionaryUtils(org.broadinstitute.hellbender.utils.SequenceDictionaryUtils) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SequenceDictionaryCompatibility(org.broadinstitute.hellbender.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 68 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SequenceDictionaryUtilsUnitTest method testCRAMValidationDoesAcceptSuperset.

@Test(dataProvider = "SupersetData")
public void testCRAMValidationDoesAcceptSuperset(final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs) {
    final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
    final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
    //In these inputs , cram contigs are subsets of ref contigs and so it should be accepted
    SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
}
Also used : SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 69 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class SequenceDictionaryUtilsUnitTest method testCRAMValidationDoesRequireSuperset.

@Test(dataProvider = "NonSupersetData", expectedExceptions = UserException.IncompatibleSequenceDictionaries.class)
public void testCRAMValidationDoesRequireSuperset(final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs) {
    final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
    final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
    // CRAM validation against the reference SHOULD require a superset relationship, so we should
    // get an exception here
    SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
}
Also used : SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 70 with SAMSequenceDictionary

use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.

the class BAQUnitTest method createData1.

@DataProvider(name = "data")
public Object[][] createData1() {
    List<BAQTest> params = new ArrayList<>();
    SAMSequenceDictionary dict = new SAMSequenceDictionary();
    dict.addSequence(new SAMSequenceRecord("1", Integer.MAX_VALUE));
    params.add(new BAQTest("GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTTCTCCTGCTCCTGCGTGATCCGCTGCAG", "GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTCCTCCTGCTCCTGCGTGATCCGCTGCAG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGIIHGGGIHHIIHIIHIIHGICCIGEII@IGIHCG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGII410..0HIIHIIHIIHGICCIGEII@IGIHCE"));
    params.add(new BAQTest("GCTTTTTCTCCTCCTG", "GCTTTTCCTCCTCCTG", "IIHGGGIHHIIHHIIH", "EI410..0HIIHHIIE"));
    final String refString1 = "AAATTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAG";
    final ReferenceDataSource rds1 = new ReferenceMemorySource(new ReferenceBases(refString1.getBytes(), new SimpleInterval("1", 9999807, 10000032)), dict);
    // big and complex, also does a cap from 3 to 4!
    params.add(new BAQTest(-3, 9999810L, "49M1I126M1I20M1I25M", refString1, "TTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGTAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCCACCCACCACCACGCCTGGCCTAATTTTTTTGTATTTTTAGTAGAGA", ">IHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?B3BBC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@C<>5;<A5=A;>=64>???B>=6497<<;;<;>2?>BA@??A6<<A59", ">EHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?838BC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@%<>5;<A5=A;>=64>???B;86497<<;;<;>2?>BA@??A6<<A59", rds1));
    final String refString2 = "CCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCC";
    final ReferenceDataSource rds2 = new ReferenceMemorySource(new ReferenceBases(refString2.getBytes(), new SimpleInterval("1", 9999963, 10000004)), dict);
    // now changes
    params.add(new BAQTest(-3, 9999966L, "36M", refString2, "AGTAGCTGGGACTACAGGCACCCACCACCACGCCTG", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", rds2));
    final String refString3 = "CCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATA";
    final ReferenceDataSource rds3 = new ReferenceMemorySource(new ReferenceBases(refString3.getBytes(), new SimpleInterval("1", 9999990, 10000031)), dict);
    // raw base qualities are low -- but they shouldn't be capped
    params.add(new BAQTest(-3, 9999993L, "4=13X2=3X1=4X2=4X1=2X", refString3, "CCACGCTTGGCAAAGTTTTCCGTACGTTTAGCCGAG", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", rds3));
    List<Object[]> params2 = new ArrayList<>();
    for (BAQTest x : params) params2.add(new Object[] { x });
    return params2.toArray(new Object[][] {});
}
Also used : ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DataProvider(org.testng.annotations.DataProvider)

Aggregations

SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)110 Test (org.testng.annotations.Test)41 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)37 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)37 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)35 File (java.io.File)31 UserException (org.broadinstitute.hellbender.exceptions.UserException)24 VariantContext (htsjdk.variant.variantcontext.VariantContext)23 Argument (org.broadinstitute.barclay.argparser.Argument)21 Collectors (java.util.stream.Collectors)20 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)20 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)18 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 VCFHeader (htsjdk.variant.vcf.VCFHeader)16 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 List (java.util.List)14 JavaRDD (org.apache.spark.api.java.JavaRDD)14 Broadcast (org.apache.spark.broadcast.Broadcast)12 StreamSupport (java.util.stream.StreamSupport)11