use of htsjdk.samtools.util.Locatable in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModel method calculateRefConfidence.
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
*
* @param refHaplotype the reference haplotype, used to get the reference bases across activeRegion.getLoc()
* @param calledHaplotypes a list of haplotypes that segregate in this region, for realignment of the reads in the
* readLikelihoods, corresponding to each reads best haplotype. Must contain the refHaplotype.
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
* @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
* contexts as well as calls from variantCalls if any were provided
*/
public List<VariantContext> calculateRefConfidence(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final ReadLikelihoods<Haplotype> readLikelihoods, final PloidyModel ploidyModel, final List<VariantContext> variantCalls) {
Utils.nonNull(refHaplotype, "refHaplotype cannot be null");
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
Utils.nonNull(activeRegion, "activeRegion cannot be null");
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
Utils.validateArg(refHaplotype.length() == activeRegion.getExtendedSpan().size(), () -> "refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getSpan().size() + " are different");
Utils.nonNull(ploidyModel, "the ploidy model cannot be null");
// the first sample = the only sample in reference-confidence mode.
final int ploidy = ploidyModel.samplePloidy(0);
final SimpleInterval refSpan = activeRegion.getSpan();
final List<ReadPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.getSample(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedSpan().getStart();
for (final ReadPileup pileup : refPileups) {
final Locatable curPos = pileup.getLocation();
final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if (overlappingSite != null && overlappingSite.getStart() == curPos.getStart()) {
results.add(overlappingSite);
} else {
// otherwise emit a reference confidence variant context
results.add(makeReferenceConfidenceVariantContext(ploidy, ref, sampleName, globalRefOffset, pileup, curPos, offset));
}
}
return results;
}
use of htsjdk.samtools.util.Locatable in project gatk-protected by broadinstitute.
the class GATKProtectedVariantContextUtilsUnitTest method testGetPileup.
@Test
public void testGetPileup() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final Locatable loc = new SimpleInterval("chr1", 10, 10);
final int readLength = 3;
//this read doesn't overlap {@code loc}
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength);
read1.setBases(Utils.dupBytes((byte) 'A', readLength));
read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read1.setCigar("3M");
//this read overlaps {@code loc} with a Q30 'A'
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength);
read2.setBases(Utils.dupBytes((byte) 'A', readLength));
read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read2.setCigar("3M");
//this read overlaps {@code loc} with a Q20 'C' (due to the deletion)
final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength);
read3.setBases(Utils.dupBytes((byte) 'C', readLength));
read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read3.setCigar("1M1D2M");
//this read doesn't overlap {@code loc} due to the deletion
final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength);
read4.setBases(Utils.dupBytes((byte) 'C', readLength));
read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read4.setCigar("1M5D2M");
final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4));
// the pileup should contain a Q30 'A' and a Q20 'C'
final int[] counts = pileup.getBaseCounts();
Assert.assertEquals(counts, new int[] { 1, 1, 0, 0 });
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class SparkSharder method computePartitionReadExtents.
/**
* For each partition, find the interval that spans it.
*/
static <L extends Locatable> List<PartitionLocatable<SimpleInterval>> computePartitionReadExtents(JavaRDD<L> locatables, SAMSequenceDictionary sequenceDictionary, int maxLocatableLength) {
// Find the first locatable in each partition. This is very efficient since only the first record in each partition is read.
// If a partition is empty then set the locatable to null
List<PartitionLocatable<L>> allSplitPoints = locatables.mapPartitions((FlatMapFunction<Iterator<L>, PartitionLocatable<L>>) it -> ImmutableList.of(new PartitionLocatable<>(-1, it.hasNext() ? it.next() : null)).iterator()).collect();
// fill in index and remove nulls (empty partitions)
List<PartitionLocatable<L>> splitPoints = new ArrayList<>();
for (int i = 0; i < allSplitPoints.size(); i++) {
L locatable = allSplitPoints.get(i).getLocatable();
if (locatable != null) {
splitPoints.add(new PartitionLocatable<L>(i, locatable));
}
}
List<PartitionLocatable<SimpleInterval>> extents = new ArrayList<>();
for (int i = 0; i < splitPoints.size(); i++) {
PartitionLocatable<L> splitPoint = splitPoints.get(i);
int partitionIndex = splitPoint.getPartitionIndex();
Locatable current = splitPoint.getLocatable();
int intervalContigIndex = sequenceDictionary.getSequenceIndex(current.getContig());
final Locatable next;
final int nextContigIndex;
if (i < splitPoints.size() - 1) {
next = splitPoints.get(i + 1);
nextContigIndex = sequenceDictionary.getSequenceIndex(next.getContig());
} else {
next = null;
nextContigIndex = sequenceDictionary.getSequences().size();
}
if (intervalContigIndex == nextContigIndex) {
// same contig
addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), next.getStart() + maxLocatableLength);
} else {
// complete current contig
int contigEnd = sequenceDictionary.getSequence(current.getContig()).getSequenceLength();
addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), contigEnd);
// add any whole contigs up to next (exclusive)
for (int contigIndex = intervalContigIndex + 1; contigIndex < nextContigIndex; contigIndex++) {
SAMSequenceRecord sequence = sequenceDictionary.getSequence(contigIndex);
addPartitionReadExtent(extents, partitionIndex, sequence.getSequenceName(), 1, sequence.getSequenceLength());
}
// add start of next contig
if (next != null) {
addPartitionReadExtent(extents, partitionIndex, next.getContig(), 1, next.getStart() + maxLocatableLength);
}
}
}
return extents;
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class IntervalsSkipListOneContig method buildIndexAndCheck.
// build index and check everyone's in the same contig
private int[] buildIndexAndCheck() {
int max = 0;
int key = 0;
int idx = 0;
// result: bucket# -> how far that bucket reaches
final int[] result = new int[(vs.size() >> shift) + 1];
for (Locatable v : vs) {
int k = idx >> shift;
if (k > key) {
result[key] = max;
key = k;
}
if (v.getEnd() > max) {
max = v.getEnd();
}
idx++;
}
result[key] = max;
return result;
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class SimpleIntervalUnitTest method badIntervalsFromLocatable.
@Test(dataProvider = "badIntervals", expectedExceptions = IllegalArgumentException.class)
public void badIntervalsFromLocatable(String contig, int start, int end, String name) {
Locatable l = getLocatable(contig, start, end);
new SimpleInterval(l);
}
Aggregations