use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class GATKToolUnitTest method testBestSequenceDictionary_fromReads.
@Test
public void testBestSequenceDictionary_fromReads() throws Exception {
final GATKTool tool = new TestGATKToolWithReads();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final String[] args = { "-I", bamFile.getCanonicalPath() };
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to bam dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
try (final SamReader open = SamReaderFactory.makeDefault().open(bamFile)) {
final SAMSequenceDictionary bamDict = open.getFileHeader().getSequenceDictionary();
toolDict.assertSameDictionary(bamDict);
bamDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, bamDict);
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class AddContextDataToReadSparkUnitTest method addContextDataTest.
@Test(dataProvider = "bases", groups = "spark")
public void addContextDataTest(List<GATKRead> reads, List<GATKVariant> variantList, List<KV<GATKRead, ReadContextData>> expectedReadContextData, JoinStrategy joinStrategy) throws IOException {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
JavaRDD<GATKRead> rddReads = ctx.parallelize(reads);
JavaRDD<GATKVariant> rddVariants = ctx.parallelize(variantList);
ReferenceMultiSource mockSource = mock(ReferenceMultiSource.class, withSettings().serializable());
when(mockSource.getReferenceBases(any(PipelineOptions.class), any())).then(new ReferenceBasesAnswer());
when(mockSource.getReferenceWindowFunction()).thenReturn(ReferenceWindowFunctions.IDENTITY_FUNCTION);
SAMSequenceDictionary sd = new SAMSequenceDictionary(Lists.newArrayList(new SAMSequenceRecord("1", 100000), new SAMSequenceRecord("2", 100000)));
when(mockSource.getReferenceSequenceDictionary(null)).thenReturn(sd);
JavaPairRDD<GATKRead, ReadContextData> rddActual = AddContextDataToReadSpark.add(ctx, rddReads, mockSource, rddVariants, joinStrategy, sd, 10000, 1000);
Map<GATKRead, ReadContextData> actual = rddActual.collectAsMap();
Assert.assertEquals(actual.size(), expectedReadContextData.size());
for (KV<GATKRead, ReadContextData> kv : expectedReadContextData) {
ReadContextData readContextData = actual.get(kv.getKey());
Assert.assertNotNull(readContextData);
Assert.assertTrue(CollectionUtils.isEqualCollection(Lists.newArrayList(readContextData.getOverlappingVariants()), Lists.newArrayList(kv.getValue().getOverlappingVariants())));
SimpleInterval minimalInterval = kv.getValue().getOverlappingReferenceBases().getInterval();
ReferenceBases subset = readContextData.getOverlappingReferenceBases().getSubset(minimalInterval);
Assert.assertEquals(subset, kv.getValue().getOverlappingReferenceBases());
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class FindBadGenomicKmersSparkUnitTest method miniRefTest.
@Test(groups = "spark")
public void miniRefTest() throws IOException {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReferenceMultiSource ref = new ReferenceMultiSource((PipelineOptions) null, REFERENCE_FILE_NAME, ReferenceWindowFunctions.IDENTITY_FUNCTION);
final SAMSequenceDictionary dict = ref.getReferenceSequenceDictionary(null);
if (dict == null)
throw new GATKException("No reference dictionary available.");
final Map<SVKmer, Long> kmerMap = new LinkedHashMap<>();
for (final SAMSequenceRecord rec : dict.getSequences()) {
final SimpleInterval interval = new SimpleInterval(rec.getSequenceName(), 1, rec.getSequenceLength());
final byte[] bases = ref.getReferenceBases(null, interval).getBases();
final SVKmerizer kmerizer = new SVKmerizer(bases, KMER_SIZE, new SVKmerLong());
while (kmerizer.hasNext()) {
final SVKmer kmer = kmerizer.next().canonical(KMER_SIZE);
final Long currentCount = kmerMap.getOrDefault(kmer, 0L);
kmerMap.put(kmer, currentCount + 1);
}
}
final Iterator<Map.Entry<SVKmer, Long>> kmerIterator = kmerMap.entrySet().iterator();
while (kmerIterator.hasNext()) {
if (kmerIterator.next().getValue() <= FindBadGenomicKmersSpark.MAX_KMER_FREQ)
kmerIterator.remove();
}
final List<SVKmer> badKmers = FindBadGenomicKmersSpark.findBadGenomicKmers(ctx, KMER_SIZE, Integer.MAX_VALUE, ref, null, null);
final Set<SVKmer> badKmerSet = new HashSet<>(badKmers);
Assert.assertEquals(badKmers.size(), badKmerSet.size());
Assert.assertEquals(badKmerSet, kmerMap.keySet());
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SequenceDictionaryUtilsUnitTest method testSequenceDictionaryValidation.
@Test(dataProvider = "SequenceDictionaryDataProvider")
public void testSequenceDictionaryValidation(final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs, //not needed by this test
final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, final Class<? extends UserException> expectedExceptionUponValidation, final boolean requireSuperset, final boolean checkContigOrdering) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
final String testDescription = String.format("First dictionary: %s Second dictionary: %s", SequenceDictionaryUtils.getDictionaryAsString(firstDictionary), SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
Exception exceptionThrown = null;
try {
SequenceDictionaryUtils.validateDictionaries("firstDictionary", firstDictionary, "secondDictionary", secondDictionary, requireSuperset, checkContigOrdering);
} catch (Exception e) {
exceptionThrown = e;
}
if (expectedExceptionUponValidation != null) {
Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown), String.format("Expected exception %s but saw %s instead. %s", expectedExceptionUponValidation.getSimpleName(), exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(), testDescription));
} else {
Assert.assertTrue(exceptionThrown == null, String.format("Expected no exception but saw exception %s instead. %s", exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none", testDescription));
}
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SequenceDictionaryUtilsUnitTest method testStandardValidationDoesNotRequireSuperset.
@Test(dataProvider = "NonSupersetData")
public void testStandardValidationDoesNotRequireSuperset(final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
// Standard validation (the overload of validateDictionaries() that doesn't take any boolean args)
// should not require a superset relationship, so we shouldn't get an exception here
SequenceDictionaryUtils.validateDictionaries("first", firstDictionary, "second", secondDictionary);
}
Aggregations