use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class SegmentUtils method mergeSpuriousStartsAndEnds.
//remerge spurious target-only segments introduced at starts and ends of the
//original target-coverage segments by union of breakpoints
private static List<SimpleInterval> mergeSpuriousStartsAndEnds(final List<SimpleInterval> segments, final List<SimpleInterval> targetSegments, final TargetCollection<AllelicCount> snps) {
//get original target-segment starts and ends
final Set<SimpleInterval> targetSegmentStarts = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getStart(), s.getStart())).collect(Collectors.toSet());
final Set<SimpleInterval> targetSegmentEnds = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getEnd(), s.getEnd())).collect(Collectors.toSet());
final List<SimpleInterval> mergedSegments = new ArrayList<>();
final ListIterator<SimpleInterval> segmentsIter = segments.listIterator();
while (segmentsIter.hasNext()) {
final SimpleInterval segment = segmentsIter.next();
//do not remerge non-target-only segments (i.e., those containing SNPs)
if (snps.targetCount(segment) > 0) {
mergedSegments.add(segment);
continue;
}
final SimpleInterval segmentStart = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
final SimpleInterval segmentEnd = new SimpleInterval(segment.getContig(), segment.getEnd(), segment.getEnd());
if (targetSegmentStarts.contains(segmentStart) && !targetSegmentEnds.contains(segmentEnd)) {
//remerge segments introduced at starts to the right
final SimpleInterval nextSegment = segmentsIter.next();
mergedSegments.add(SegmentMergeUtils.mergeSegments(segment, nextSegment));
} else if (!targetSegmentStarts.contains(segmentStart) && targetSegmentEnds.contains(segmentEnd)) {
//remerge segments introduced at ends to the left
final int previousIndex = mergedSegments.size() - 1;
final SimpleInterval previousSegment = mergedSegments.get(previousIndex);
mergedSegments.set(previousIndex, SegmentMergeUtils.mergeSegments(previousSegment, segment));
} else {
//do not merge otherwise; although some spurious segments remain, they will be merged in a later step
mergedSegments.add(segment);
}
}
return mergedSegments;
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class AllelicPanelOfNormals method initializeSiteToHyperparameterPairMap.
//transforms ref/alt counts at each site to hyperparameters, see docs/CNVs/CNV-methods.pdf for details
private void initializeSiteToHyperparameterPairMap(final AllelicCountCollection counts) {
logger.info("Initializing allelic panel of normals...");
for (final AllelicCount count : counts.getCounts()) {
final SimpleInterval site = count.getInterval();
final HyperparameterValues hyperparameterValues = new HyperparameterValues(globalHyperparameterValues.alpha, globalHyperparameterValues.beta, count.getAltReadCount(), count.getRefReadCount());
if (siteToHyperparameterValuesMap.containsKey(site)) {
throw new UserException.BadInput("Input AllelicCountCollection for allelic panel of normals contains duplicate sites.");
} else {
siteToHyperparameterValuesMap.put(site, hyperparameterValues);
}
}
logger.info("Allelic panel of normals initialized.");
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class AllelicPanelOfNormalsCreator method create.
/**
* Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
* sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
* @param siteFrequencyThreshold site-frequency threshold
* @return an {@link AllelicPanelOfNormals} containing sites
* above the site-frequency threshold
*/
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
logger.info("Creating allelic panel of normals...");
//used to filter on frequency
final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
//store only the total counts (smaller memory footprint)
final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
int pulldownFileCounter = 1;
final int totalNumberOfSamples = pulldownFiles.size();
for (final File pulldownFile : pulldownFiles) {
logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
for (final AllelicCount count : pulldownCounts.getCounts()) {
//update the sum of ref and alt counts at each site
final SimpleInterval site = count.getInterval();
final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
totalCountsMap.put(site, updatedCountAtSite);
//update the number of samples seen possessing each site
final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
if (numberOfSamplesAtSite == null) {
numberOfSamplesMap.put(site, new MutableInt(1));
} else {
numberOfSamplesAtSite.increment();
}
}
}
logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
//filter out sites that appear at a frequency strictly less than the provided threshold
final AllelicCountCollection totalCounts = new AllelicCountCollection();
numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
return new AllelicPanelOfNormals(totalCounts);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class AlleleFractionSegmenterUnitTest method generateAllelicCount.
protected static AllelicCount generateAllelicCount(final double minorFraction, final SimpleInterval position, final RandomGenerator rng, final GammaDistribution biasGenerator, final double outlierProbability) {
final int numReads = 100;
final double bias = biasGenerator.sample();
//flip a coin to decide alt minor (alt fraction = minor fraction) or ref minor (alt fraction = 1 - minor fraction)
final double altFraction = rng.nextDouble() < 0.5 ? minorFraction : 1 - minorFraction;
//the probability of an alt read is the alt fraction modified by the bias or, in the case of an outlier, random
final double pAlt = rng.nextDouble() < outlierProbability ? rng.nextDouble() : altFraction / (altFraction + (1 - altFraction) * bias);
final int numAltReads = new BinomialDistribution(rng, numReads, pAlt).sample();
final int numRefReads = numReads - numAltReads;
return new AllelicCount(position, numAltReads, numRefReads);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown.
@DataProvider(name = "inputGetTumorHetPulldown")
public Object[][] inputGetTumorHetPulldown() {
final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
normalHetIntervals.add(new Interval("1", 11522, 11522));
normalHetIntervals.add(new Interval("1", 12098, 12098));
normalHetIntervals.add(new Interval("1", 14630, 14630));
normalHetIntervals.add(new Interval("2", 14689, 14689));
normalHetIntervals.add(new Interval("2", 14982, 14982));
return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
Aggregations