use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class GetBayesianHetCoverageIntegrationTest method testNormalJob.
@Test
public void testNormalJob() {
final File normalOutputFile = createTempFile("normal-test", ".tsv");
Pulldown normalHetPulldownExpected, normalHetPulldownResult;
/* test 1: tight calling stringency */
final String[] args_1 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(15.0) };
runCommandLine(args_1);
normalHetPulldownExpected = new Pulldown(normalHeader);
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.99));
normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
/* test 2: loose calling stringency */
final String[] args_2 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(2.0) };
runCommandLine(args_2);
normalHetPulldownExpected = new Pulldown(normalHeader);
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A, 11, 18.60));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 29.29));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.99));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.60));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.99));
normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class BayesianHetPulldownCalculatorUnitTest method testGetHetPulldown.
@Test
public void testGetHetPulldown() {
Pulldown testPulldown, expectedPulldown;
File tempFile;
/* write Pulldown to file while checking the results */
try {
/* test 1: normal, loose threshold */
testPulldown = calculator.getHetPulldown(NORMAL_BAM_FILE, 2);
tempFile = File.createTempFile("testPulldownNormalLoose", ".txt");
testPulldown.write(tempFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);
expectedPulldown = new Pulldown(normalHeader);
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A, 11, 18.38));
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 28.84));
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.39));
expectedPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.23));
expectedPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.54));
Assert.assertEquals(new Pulldown(tempFile, normalHeader), expectedPulldown);
/* test 2: normal, tight threshold */
testPulldown = calculator.getHetPulldown(NORMAL_BAM_FILE, 12);
tempFile = File.createTempFile("testPulldownNormalTight", ".txt");
testPulldown.write(tempFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);
expectedPulldown = new Pulldown(normalHeader);
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 28.84));
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.39));
expectedPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.23));
Assert.assertEquals(new Pulldown(tempFile, normalHeader), expectedPulldown);
/* test 3: tumor, loose threshold */
testPulldown = calculator.getHetPulldown(TUMOR_BAM_FILE, 2);
tempFile = File.createTempFile("testPulldownTumorLoose", ".txt");
testPulldown.write(tempFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);
expectedPulldown = new Pulldown(tumorHeader);
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A, 11, 15.63));
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 24.72));
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 33.89));
expectedPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 24.11));
expectedPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 21.10));
Assert.assertEquals(new Pulldown(tempFile, normalHeader), expectedPulldown);
/* test 4: tumor, tight threshold */
testPulldown = calculator.getHetPulldown(TUMOR_BAM_FILE, 12);
tempFile = File.createTempFile("testPulldownTumorTight", ".txt");
testPulldown.write(tempFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);
expectedPulldown = new Pulldown(tumorHeader);
expectedPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 33.89));
Assert.assertEquals(new Pulldown(tempFile, normalHeader), expectedPulldown);
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile("Could not write pulldown to to file.", e);
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown15.
@DataProvider(name = "inputGetTumorHetPulldownMin15")
public Object[][] inputGetTumorHetPulldown15() {
final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
normalHetIntervals.add(new Interval("1", 14630, 14630));
normalHetIntervals.add(new Interval("2", 14689, 14689));
return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class SegmentUtils method mergeSpuriousStartsAndEnds.
//remerge spurious target-only segments introduced at starts and ends of the
//original target-coverage segments by union of breakpoints
private static List<SimpleInterval> mergeSpuriousStartsAndEnds(final List<SimpleInterval> segments, final List<SimpleInterval> targetSegments, final TargetCollection<AllelicCount> snps) {
//get original target-segment starts and ends
final Set<SimpleInterval> targetSegmentStarts = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getStart(), s.getStart())).collect(Collectors.toSet());
final Set<SimpleInterval> targetSegmentEnds = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getEnd(), s.getEnd())).collect(Collectors.toSet());
final List<SimpleInterval> mergedSegments = new ArrayList<>();
final ListIterator<SimpleInterval> segmentsIter = segments.listIterator();
while (segmentsIter.hasNext()) {
final SimpleInterval segment = segmentsIter.next();
//do not remerge non-target-only segments (i.e., those containing SNPs)
if (snps.targetCount(segment) > 0) {
mergedSegments.add(segment);
continue;
}
final SimpleInterval segmentStart = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
final SimpleInterval segmentEnd = new SimpleInterval(segment.getContig(), segment.getEnd(), segment.getEnd());
if (targetSegmentStarts.contains(segmentStart) && !targetSegmentEnds.contains(segmentEnd)) {
//remerge segments introduced at starts to the right
final SimpleInterval nextSegment = segmentsIter.next();
mergedSegments.add(SegmentMergeUtils.mergeSegments(segment, nextSegment));
} else if (!targetSegmentStarts.contains(segmentStart) && targetSegmentEnds.contains(segmentEnd)) {
//remerge segments introduced at ends to the left
final int previousIndex = mergedSegments.size() - 1;
final SimpleInterval previousSegment = mergedSegments.get(previousIndex);
mergedSegments.set(previousIndex, SegmentMergeUtils.mergeSegments(previousSegment, segment));
} else {
//do not merge otherwise; although some spurious segments remain, they will be merged in a later step
mergedSegments.add(segment);
}
}
return mergedSegments;
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionInitializer method initialMinorFractions.
/**
* Initialize minor fractions assuming no allelic bias <p></p>
*
* We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
* that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
* responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
* these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
* respectively, where I is the (incomplete) regularized Beta function.
* By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
*
* Finally, we set each minor fraction to the responsibility-weighted total count of
* reads in minor allele divided by total reads, ignoring outliers.
*/
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
final int numSegments = data.getNumSegments();
final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
for (int segment = 0; segment < numSegments; segment++) {
double responsibilityWeightedMinorAlleleReadCount = 0.0;
double responsibilityWeightedTotalReadCount = 0.0;
for (final AllelicCount count : data.getCountsInSegment(segment)) {
final int a = count.getAltReadCount();
final int r = count.getRefReadCount();
double altMinorResponsibility;
try {
altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
} catch (final MaxCountExceededException e) {
//if the special function can't be computed, give an all-or-nothing responsibility
altMinorResponsibility = a < r ? 1.0 : 0.0;
}
responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
responsibilityWeightedTotalReadCount += a + r;
}
// we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the +1 and +2
result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
}
return result;
}
Aggregations