use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class SparkSharderUnitTest method testSingleContig.
@Test
public void testSingleContig() throws IOException {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
// Consider the following reads (divided into four partitions), and intervals.
// This test counts the number of reads that overlap each interval.
// 1 2
// 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
// ---------------------------------------------------------
// Reads in partition 0
// [-----]
// [-----]
// [-----]
// ---------------------------------------------------------
// Reads in partition 1
// [-----]
// [-----]
// [-----]
// ---------------------------------------------------------
// Reads in partition 2
// [-----]
// [-----]
// [-----]
// ---------------------------------------------------------
// Reads in partition 3
// [-----]
// [-----]
// [-----]
// ---------------------------------------------------------
// Per-partition read extents
// [-----------------]
// [-----]
// [---------------]
// [---------------------]
// ---------------------------------------------------------
// Intervals
// [-----]
// [---------]
// [-----------------------]
//
// 1 2
// ---------------------------------------------------------
// 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
JavaRDD<TestRead> reads = ctx.parallelize(ImmutableList.of(new TestRead(1, 3), new TestRead(5, 7), new TestRead(7, 9), new TestRead(7, 9), new TestRead(7, 9), new TestRead(7, 9), new TestRead(7, 9), new TestRead(11, 13), new TestRead(12, 14), new TestRead(17, 19), new TestRead(21, 23), new TestRead(25, 27)), 4);
List<SimpleInterval> intervals = ImmutableList.of(new SimpleInterval("1", 2, 4), new SimpleInterval("1", 8, 12), new SimpleInterval("1", 11, 22));
List<ShardBoundary> shardBoundaries = intervals.stream().map(si -> new ShardBoundary(si, si)).collect(Collectors.toList());
ImmutableMap<SimpleInterval, Integer> expectedReadsPerInterval = ImmutableMap.of(intervals.get(0), 1, intervals.get(1), 7, intervals.get(2), 4);
JavaPairRDD<Locatable, Integer> readsPerInterval = SparkSharder.shard(ctx, reads, TestRead.class, sequenceDictionary, shardBoundaries, STANDARD_READ_LENGTH, false).flatMapToPair(new CountOverlappingReadsFunction());
assertEquals(readsPerInterval.collectAsMap(), expectedReadsPerInterval);
JavaPairRDD<Locatable, Integer> readsPerIntervalShuffle = SparkSharder.shard(ctx, reads, TestRead.class, sequenceDictionary, shardBoundaries, STANDARD_READ_LENGTH, true).flatMapToPair(new CountOverlappingReadsFunction());
assertEquals(readsPerIntervalShuffle.collectAsMap(), expectedReadsPerInterval);
try {
// max read length less than actual causes exception
int maxReadLength = STANDARD_READ_LENGTH - 1;
SparkSharder.shard(ctx, reads, TestRead.class, sequenceDictionary, shardBoundaries, maxReadLength, true).flatMapToPair(new CountOverlappingReadsFunction()).collect();
} catch (Exception e) {
assertEquals(e.getCause().getClass(), UserException.class);
}
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class BAQUnitTest method testGetReferenceWindowForReadStopsAtContigStart.
//regression test for https://github.com/broadinstitute/gatk/issues/1234
@Test
public void testGetReferenceWindowForReadStopsAtContigStart() throws Exception {
final int bandwidth = 7;
GATKRead read = ArtificialReadUtils.createArtificialRead("10M");
final int start = 2;
final Locatable pos = new SimpleInterval("1", start, start + read.getLength());
read.setPosition(pos);
final SimpleInterval referenceWindowForRead = BAQ.getReferenceWindowForRead(read, bandwidth);
//start is at 1 because we hit the front end of the reference
SimpleInterval refWindow = new SimpleInterval("1", 1, read.getEnd() + (bandwidth / 2));
Assert.assertEquals(referenceWindowForRead, refWindow);
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method testLexicographicalOrderComparator.
@Test(dataProvider = "lexicographicalOrderComparatorData")
public void testLexicographicalOrderComparator(final List<Locatable> unsorted) {
final List<Locatable> sorted = unsorted.stream().sorted(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR).collect(Collectors.toList());
for (int i = 0; i < sorted.size() - 1; i++) {
final Locatable thisLoc = sorted.get(i);
final Locatable nextLoc = sorted.get(i + 1);
if (thisLoc.getContig() == nextLoc.getContig()) {
Assert.assertTrue(thisLoc.getStart() <= nextLoc.getStart());
if (thisLoc.getStart() == nextLoc.getStart()) {
Assert.assertTrue(thisLoc.getEnd() <= nextLoc.getEnd());
}
} else if (thisLoc.getContig() == null) {
Assert.fail("Null contig must go last");
} else if (nextLoc.getContig() != null) {
Assert.assertTrue(thisLoc.getContig().compareTo(nextLoc.getContig()) <= 0);
if (thisLoc.getContig().equals(nextLoc.getContig())) {
Assert.assertTrue(thisLoc.getStart() <= nextLoc.getStart());
if (thisLoc.getStart() == nextLoc.getStart()) {
Assert.assertTrue(thisLoc.getEnd() <= nextLoc.getEnd());
}
}
}
}
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class GenomeLocParser method parseGenomeLoc.
// --------------------------------------------------------------------------------------------------------------
//
// Parsing genome locs
//
// --------------------------------------------------------------------------------------------------------------
/**
* parse a genome interval, from a location string
*
* Performs interval-style validation:
*
* contig is valid; start and stop less than the end; start <= stop, and start/stop are on the contig
* @param str the string to parse
*
* @return a GenomeLoc representing the String
*
*/
public GenomeLoc parseGenomeLoc(final String str) {
try {
if (isUnmappedGenomeLocString(str)) {
return GenomeLoc.UNMAPPED;
}
final Locatable locatable = new SimpleInterval(str);
final String contig = locatable.getContig();
final int start = locatable.getStart();
int stop = locatable.getEnd();
if (!contigIsInDictionary(contig)) {
throw new UserException.MalformedGenomeLoc("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
}
if (stop == Integer.MAX_VALUE) {
// lookup the actual stop position!
stop = getContigInfo(contig).getSequenceLength();
}
return createGenomeLoc(contig, getContigIndex(contig), start, stop, true);
} catch (UserException.MalformedGenomeLoc e) {
throw e;
} catch (IllegalArgumentException | UserException e) {
throw new UserException.MalformedGenomeLoc("Failed to parse Genome Location string: " + str + ": " + e.getMessage(), e);
}
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class HaplotypeCallerGenotypingEngineUnitTest method testAddMiscellaneousAllele.
@Test(dataProvider = "AddMiscellaneousDataProvider", enabled = false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset, final String ref, final int refOffset, final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
final byte baseQual = (byte) 30;
final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
final Locatable loc = new SimpleInterval("20", refOffset, refOffset);
final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), readOffset);
final VariantContextBuilder vcb = new VariantContextBuilder();
final GenotypeBuilder gb = new GenotypeBuilder();
final List<String> alleleStrings = new ArrayList<>(1 + alternatives.length);
alleleStrings.add(referenceAllele);
alleleStrings.addAll(Arrays.asList(alternatives));
gb.AD(new int[] { 1 });
gb.DP(1);
gb.PL(likelihoods);
vcb.alleles(alleleStrings);
vcb.loc("20", refOffset, refOffset + referenceAllele.length() - 1);
vcb.genotypes(gb.make());
final VariantContext vc = vcb.make();
// GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
final VariantContext updatedVc = null;
final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
Assert.assertEquals(updatedLikelihoodsArray[i], expected[i], 0.0001);
}
Allele altAllele = null;
for (final Allele allele : updatedVc.getAlleles()) if (allele.isSymbolic() && allele.getBaseString().equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME))
altAllele = allele;
Assert.assertNotNull(altAllele);
}
Aggregations