use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class GATKProtectedVariantContextUtilsUnitTest method testGetPileup.
@Test
public void testGetPileup() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final Locatable loc = new SimpleInterval("chr1", 10, 10);
final int readLength = 3;
//this read doesn't overlap {@code loc}
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength);
read1.setBases(Utils.dupBytes((byte) 'A', readLength));
read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read1.setCigar("3M");
//this read overlaps {@code loc} with a Q30 'A'
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength);
read2.setBases(Utils.dupBytes((byte) 'A', readLength));
read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read2.setCigar("3M");
//this read overlaps {@code loc} with a Q20 'C' (due to the deletion)
final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength);
read3.setBases(Utils.dupBytes((byte) 'C', readLength));
read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read3.setCigar("1M1D2M");
//this read doesn't overlap {@code loc} due to the deletion
final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength);
read4.setBases(Utils.dupBytes((byte) 'C', readLength));
read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read4.setCigar("1M5D2M");
final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4));
// the pileup should contain a Q30 'A' and a Q20 'C'
final int[] counts = pileup.getBaseCounts();
Assert.assertEquals(counts, new int[] { 1, 1, 0, 0 });
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method lexicographicalOrderComparatorData.
@DataProvider(name = "lexicographicalOrderComparatorData")
public Object[][] lexicographicalOrderComparatorData() {
final String[] CONTIG_NAMES = new String[] { "A", "AA", "B", "C", null };
final int[] CONTIG_SIZES = new int[] { 200, 300, 400, 500, 600 };
final int MIN_INTERVAL_SIZE = 1;
final int MAX_INTERVAL_SIZE = 100;
final Random rdn = new Random(1131312131);
final int CASE_NUMBER = 100;
final List<Object[]> result = new ArrayList<>(100);
for (int i = 0; i < CASE_NUMBER; i++) {
final int locatableCount = rdn.nextInt(100) + 1;
final List<Locatable> locatables = new ArrayList<>(locatableCount);
for (int j = 0; j < locatableCount; j++) {
final int contigIdx = rdn.nextInt(CONTIG_NAMES.length);
final String contig = CONTIG_NAMES[contigIdx];
final boolean useSimpleInterval = contig == null ? false : rdn.nextBoolean();
final int intervalSize = rdn.nextInt(MAX_INTERVAL_SIZE - MIN_INTERVAL_SIZE + 1) + MIN_INTERVAL_SIZE;
final int start = rdn.nextInt(CONTIG_SIZES[contigIdx] - intervalSize) + 1;
final int end = start + intervalSize - 1;
final Locatable loc = useSimpleInterval ? new SimpleInterval(contig, start, end) : new SimpleFeature(contig, start, end);
locatables.add(loc);
}
result.add(new Object[] { locatables });
}
return result.toArray(new Object[result.size()][]);
}
use of htsjdk.samtools.util.Locatable in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method writeReadCountsFromSimpleInterval.
/**
* Write a read counts file of targets with coverage to file with dummy names
* @param outWriter Writer to write targets with coverage. Never {@code null}
* @param outName Name of the output writer. Never {@code null}
* @param sampleName Name of sample being written. Never {@code null}
* @param byKeySorted Map of simple-intervals to their copy-ratio. Never {@code null}
* @param comments Comments to add to header of coverage file.
*/
public static <N extends Number> void writeReadCountsFromSimpleInterval(final Writer outWriter, final String outName, final String sampleName, final SortedMap<SimpleInterval, N> byKeySorted, final String[] comments) {
Utils.nonNull(outWriter, "Output writer cannot be null.");
Utils.nonNull(sampleName, "Sample name cannot be null.");
Utils.nonNull(byKeySorted, "Targets cannot be null.");
Utils.nonNull(comments, "Comments cannot be null.");
final boolean areTargetIntervalsAllPopulated = byKeySorted.keySet().stream().allMatch(t -> t != null);
if (!areTargetIntervalsAllPopulated) {
throw new UserException("Cannot write target coverage file with any null intervals.");
}
try (final TableWriter<ReadCountRecord> writer = writerWithIntervals(outWriter, Collections.singletonList(sampleName))) {
for (String comment : comments) {
writer.writeComment(comment);
}
for (final Locatable locatable : byKeySorted.keySet()) {
final SimpleInterval interval = new SimpleInterval(locatable);
final double coverage = byKeySorted.get(locatable).doubleValue();
writer.writeRecord(new ReadCountRecord.SingleSampleRecord(new Target(interval), coverage));
}
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile(outName, e);
}
}
use of htsjdk.samtools.util.Locatable in project gatk-protected by broadinstitute.
the class SegmentUtils method fixLegacyTargetSegmentStarts.
/*===============================================================================================================*
* PUBLIC METHODS FOR SEGMENT UNION (COMBINING TARGET AND SNP SEGMENTS) *
*===============================================================================================================*/
/**
* Legacy target segments from CBS (i.e., from legacy versions of {@link PerformSegmentation})
* are specified by target-end--target-end intervals. This method converts these to the corresponding
* target-start--target-end intervals, returning a new, modifiable list of segments. Non-legacy segments that are
* already specified by target-start--target-end intervals are also returned as a new, modifiable list of segments
* with no changes.
*/
public static List<SimpleInterval> fixLegacyTargetSegmentStarts(final List<SimpleInterval> targetSegments, final TargetCollection<? extends Locatable> targets) {
Utils.nonNull(targetSegments, "The list of target segments cannot be null.");
Utils.nonNull(targets, "The collection of targets cannot be null.");
final List<SimpleInterval> targetSegmentsFixed = new ArrayList<>();
for (final SimpleInterval segment : targetSegments) {
try {
final Locatable firstTarget = targets.targets(segment).get(0);
Utils.validateArg(firstTarget.getEnd() == segment.getStart() || firstTarget.getStart() == segment.getStart(), "List of targets and segments do not correspond.");
targetSegmentsFixed.add(new SimpleInterval(segment.getContig(), firstTarget.getStart(), segment.getEnd()));
} catch (final IndexOutOfBoundsException ex) {
throw new IllegalArgumentException("List of targets and segments do not correspond.");
}
}
return targetSegmentsFixed;
}
use of htsjdk.samtools.util.Locatable in project gatk by broadinstitute.
the class ReferenceConfidenceModel method calculateRefConfidence.
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
*
* @param refHaplotype the reference haplotype, used to get the reference bases across activeRegion.getLoc()
* @param calledHaplotypes a list of haplotypes that segregate in this region, for realignment of the reads in the
* readLikelihoods, corresponding to each reads best haplotype. Must contain the refHaplotype.
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
* @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
* contexts as well as calls from variantCalls if any were provided
*/
public List<VariantContext> calculateRefConfidence(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final ReadLikelihoods<Haplotype> readLikelihoods, final PloidyModel ploidyModel, final List<VariantContext> variantCalls) {
Utils.nonNull(refHaplotype, "refHaplotype cannot be null");
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
Utils.nonNull(activeRegion, "activeRegion cannot be null");
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
Utils.validateArg(refHaplotype.length() == activeRegion.getExtendedSpan().size(), () -> "refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getSpan().size() + " are different");
Utils.nonNull(ploidyModel, "the ploidy model cannot be null");
// the first sample = the only sample in reference-confidence mode.
final int ploidy = ploidyModel.samplePloidy(0);
final SimpleInterval refSpan = activeRegion.getSpan();
final List<ReadPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.getSample(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedSpan().getStart();
for (final ReadPileup pileup : refPileups) {
final Locatable curPos = pileup.getLocation();
final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if (overlappingSite != null && overlappingSite.getStart() == curPos.getStart()) {
results.add(overlappingSite);
} else {
// otherwise emit a reference confidence variant context
results.add(makeReferenceConfidenceVariantContext(ploidy, ref, sampleName, globalRefOffset, pileup, curPos, offset));
}
}
return results;
}
Aggregations