use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class SNPSegmenter method writeSegmentFile.
/**
* Write segment file based on maximum-likelihood estimates of the minor allele fraction at SNP sites,
* assuming the specified allelic bias. These estimates are converted to target coverages,
* which are written to a temporary file and then passed to {@link RCBSSegmenter}.
* @param snps TargetCollection of allelic counts at SNP sites
* @param sampleName sample name
* @param outputFile segment file to write to and return
* @param allelicBias allelic bias to use in estimate of minor allele fraction
*/
public static void writeSegmentFile(final TargetCollection<AllelicCount> snps, final String sampleName, final File outputFile, final double allelicBias) {
Utils.validateArg(snps.totalSize() > 0, "Must have a positive number of SNPs to perform SNP segmentation.");
try {
final File targetsFromSNPCountsFile = File.createTempFile("targets-from-snps", ".tsv");
final List<Target> targets = snps.targets().stream().map(ac -> new Target(name(ac), ac.getInterval())).collect(Collectors.toList());
final RealMatrix minorAlleleFractions = new Array2DRowRealMatrix(snps.targetCount(), 1);
minorAlleleFractions.setColumn(0, snps.targets().stream().mapToDouble(ac -> ac.estimateMinorAlleleFraction(allelicBias)).toArray());
ReadCountCollectionUtils.write(targetsFromSNPCountsFile, new ReadCountCollection(targets, Collections.singletonList(sampleName), minorAlleleFractions));
//segment SNPs based on observed log_2 minor allele fraction (log_2 is applied in CBS.R)
RCBSSegmenter.writeSegmentFile(sampleName, targetsFromSNPCountsFile.getAbsolutePath(), outputFile.getAbsolutePath(), false);
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile("Could not create temporary output file during " + "SNP segmentation.", e);
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class CalculatePulldownPhasePosteriors method calculatePhasePosteriors.
@VisibleForTesting
protected static AllelicCountWithPhasePosteriorsCollection calculatePhasePosteriors(final AllelicCountCollection counts, final List<SimpleInterval> segments, final AlleleFractionState state, final AllelicPanelOfNormals allelicPoN) {
final TargetCollection<SimpleInterval> segmentTargetCollection = new HashedListTargetCollection<>(segments);
final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = new AllelicCountWithPhasePosteriorsCollection();
for (final AllelicCount count : counts.getCounts()) {
final int segmentIndex = segmentTargetCollection.index(count.getInterval());
if (segmentIndex < 0) {
throw new UserException.EmptyIntersection(String.format("The AllelicCount at %s is not located within one of the input segments.", count.getInterval()));
}
final AlleleFractionGlobalParameters parameters = state.globalParameters();
final double minorFraction = state.segmentMinorFraction(segmentIndex);
final double refMinorLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.REF_MINOR, allelicPoN);
final double altMinorLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.ALT_MINOR, allelicPoN);
final double outlierLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.OUTLIER, allelicPoN);
final AllelicCountWithPhasePosteriors countWithPhasePosteriors = new AllelicCountWithPhasePosteriors(count, refMinorLogProb, altMinorLogProb, outlierLogProb);
countsWithPhasePosteriors.add(countWithPhasePosteriors);
}
return countsWithPhasePosteriors;
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class ACNVModellerUnitTest method testMergeSimilarSegmentsCopyRatio.
/**
* Test of similar-segment merging using only copy-ratio data (simulated coverages and segments).
* Spurious breakpoints have been introduced into the list of true segments; similar-segment merging should
* remerge segments broken by these breakpoints and reproduce the original list of true segments.
*/
@Test
public void testMergeSimilarSegmentsCopyRatio() throws IOException {
final boolean doRefit = true;
final String tempDir = publicTestDir + "similar-segment-copy-ratio-test";
final File tempDirFile = createTempDir(tempDir);
//load data (coverages and segments)
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
final List<AllelicCount> snpCountsDummy = Collections.singletonList(new AllelicCount(new SimpleInterval("1", 1, 1), 0, 1));
final Genome genome = new Genome(coverage, snpCountsDummy);
final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
//initial MCMC model fitting performed by ACNVModeller constructor
final ACNVModeller modeller = new ACNVModeller(segmentedGenome, NUM_SAMPLES, NUM_BURN_IN, 10, 0, ctx);
//check that model is completely fit at construction
Assert.assertTrue(modeller.isModelFit());
//perform iterations of similar-segment merging until all similar segments are merged
int prevNumSegments;
for (int numIterations = 1; numIterations <= MAX_SIMILAR_SEGMENT_MERGE_ITERATIONS; numIterations++) {
prevNumSegments = modeller.getACNVModeledSegments().size();
modeller.performSimilarSegmentMergingIteration(INTERVAL_THRESHOLD, Double.POSITIVE_INFINITY, doRefit);
if (modeller.getACNVModeledSegments().size() == prevNumSegments) {
break;
}
}
//write final segments to file
final File finalModeledSegmentsFile = new File(tempDirFile, "test-" + FINAL_FIT_FILE_TAG + SEGMENT_FILE_SUFFIX);
modeller.writeACNVModeledSegmentFile(finalModeledSegmentsFile);
//check equality of segments
final List<SimpleInterval> segmentsResult = SegmentUtils.readIntervalsFromSegmentFile(finalModeledSegmentsFile);
Assert.assertEquals(segmentsResult, SEGMENTS_TRUTH);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class GetBayesianHetCoverageIntegrationTest method testMatchedNormalTumorJob.
@Test
public void testMatchedNormalTumorJob() {
final File normalOutputFile = createTempFile("normal-test", ".tsv");
final File tumorOutputFile = createTempFile("tumor-test", ".tsv");
Pulldown tumorHetPulldownExpected, tumorHetPulldownResult;
Pulldown normalHetPulldownExpected, normalHetPulldownResult;
final String[] arguments = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(10.0) };
runCommandLine(arguments);
normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
tumorHetPulldownResult = new Pulldown(tumorOutputFile, tumorHeader);
normalHetPulldownExpected = new Pulldown(normalHeader);
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 29.29));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.98));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.60));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.99));
tumorHetPulldownExpected = new Pulldown(tumorHeader);
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11));
Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
Assert.assertEquals(tumorHetPulldownExpected, tumorHetPulldownResult);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodTightDistribution.
//if variance is tiny we can approximate lambda ~ mu in the tricky part of the integral to get an analytic result
@Test
public void testHetLogLikelihoodTightDistribution() {
//pi is just a prefactor so we don't need to test it thoroughly here
final double pi = 0.01;
for (final double f : Arrays.asList(0.1, 0.2, 0.3)) {
for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
for (final double variance : Arrays.asList(1e-6, 1e-7, 1e-8)) {
final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
for (final int a : Arrays.asList(1, 10, 20)) {
//alt count
for (final int r : Arrays.asList(1, 10, 20)) {
//ref count
final AllelicCount count = new AllelicCount(DUMMY, r, a);
final double actual = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
final double expected = log((1 - pi) / 2) + a * log(f) + r * log(1 - f) + r * log(mean) - (a + r) * log(f + (1 - f) * mean);
Assert.assertEquals(actual, expected, 1e-3);
}
}
}
}
}
}
Aggregations