use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method testMCMCWithAllelicPoN.
/**
* Test MCMC inference on simulated data using an allelic PoN. Note that these MCMC tests were written to use
* simulated hets before the allelic PoN was introduced. Rather than generate a simulated PoN on the fly,
* we simply use a fixed simulated PoN loaded from a file and check that its MLE hyperparameters are "sampled"
* correctly by simply taking the MLE PoN values---i.e., the PoN does not actually cover the simulated sites and
* hence is not used to correct reference bias in the simulated data in any way.
* This latter functionality is tested on fixed data loaded from files in
* {@link AlleleFractionModellerUnitTest#testBiasCorrection} instead.
*/
@Test
public void testMCMCWithAllelicPoN() {
final double meanBiasSimulated = 1.2;
final double biasVarianceSimulated = 0.04;
// PoN generated with alpha = 65
final double meanBiasOfPoN = 1.083;
// PoN generated with beta = 60
final double biasVarianceOfPoN = 0.0181;
final AllelicPanelOfNormals allelicPoN = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
testMCMC(meanBiasSimulated, biasVarianceSimulated, meanBiasOfPoN, biasVarianceOfPoN, allelicPoN);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class PerformAlleleFractionSegmentation method doWork.
@Override
public Object doWork() {
final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class PerformJointSegmentation method doWork.
@Override
public Object doWork() {
ParamUtils.isPositive(initialNumAFStates, "Must have at least one allele-fraction state.");
ParamUtils.isPositive(initialNumCRStates, "Must have at least one copy-ratio state.");
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
final ReadCountCollection rcc;
try {
rcc = ReadCountCollectionUtils.parse(new File(coverageFile));
} catch (final IOException ex) {
throw new UserException.BadInput("could not read input file");
}
final JointAFCRSegmenter jointSegmenter = JointAFCRSegmenter.createJointSegmenter(initialNumCRStates, rcc, initialNumAFStates, acc, allelicPoN);
final List<Pair<SimpleInterval, AFCRHiddenState>> segmentation = jointSegmenter.findSegments();
final List<ACNVModeledSegment> segments = segmentation.stream().map(pair -> new ACNVModeledSegment(pair.getLeft(), errorlessPosterior(pair.getRight().getLog2CopyRatio()), errorlessPosterior(pair.getRight().getMinorAlleleFraction()))).collect(Collectors.toList());
//TODO: make more reasonable output for ACNV 2.0
SegmentUtils.writeACNVModeledSegmentFile(outputSegmentsFile, segments, new Genome(rcc, acc.getCounts()));
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class AllelicPanelOfNormalsUnitTest method testPoNHyperparameterInitialization.
@Test
public void testPoNHyperparameterInitialization() {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final AllelicPanelOfNormals allelicPoN = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
final SimpleInterval firstSite = new SimpleInterval("1", 1, 1);
//all sites in PoN are from chr1
final SimpleInterval siteNotInPoN = new SimpleInterval("2", 1, 1);
// test initialization of hyperparameters for first site in PoN (a = 1218, r = 1317)
final double alphaAtFirstSite = allelicPoN.getAlpha(firstSite);
final double betaAtFirstSite = allelicPoN.getBeta(firstSite);
Assert.assertEquals(alphaAtFirstSite, ALPHA_EXPECTED_AT_FIRST_SITE, DELTA);
Assert.assertEquals(betaAtFirstSite, BETA_EXPECTED_AT_FIRST_SITE, DELTA);
// test initialization of MLE hyperparameters (which are default values for sites not in PoN)
final double alphaNotInPoN = allelicPoN.getAlpha(siteNotInPoN);
final double betaNotInPoN = allelicPoN.getBeta(siteNotInPoN);
final double meanBias = allelicPoN.getGlobalMeanBias();
final double biasVariance = allelicPoN.getGlobalBiasVariance();
Assert.assertEquals(alphaNotInPoN, MLE_ALPHA_EXPECTED, DELTA);
Assert.assertEquals(betaNotInPoN, MLE_BETA_EXPECTED, DELTA);
Assert.assertEquals(meanBias, MLE_MEAN_BIAS_EXPECTED, DELTA);
Assert.assertEquals(biasVariance, MLE_BIAS_VARIANCE_EXPECTED, DELTA);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class JointAFCRSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
// probability that a datum is a het i.e. #hets / (#hets + #targets)
final double hetProportion = 0.25;
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final List<AFCRHiddenState> trueJointStates = IntStream.range(0, trueLog2CopyRatios.length).mapToObj(n -> new AFCRHiddenState(trueMinorAlleleFractions[n], trueLog2CopyRatios[n])).collect(Collectors.toList());
final double trueMemoryLength = 1e5;
final double trueCauchyWidth = 0.2;
final int initialNumCRStates = 20;
final int initialNumAFStates = 20;
final AlleleFractionGlobalParameters trueAFParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
final JointAFCRHMM trueJointModel = new JointAFCRHMM(trueJointStates, trueWeights, trueMemoryLength, trueAFParams, AllelicPanelOfNormals.EMPTY_PON, trueCauchyWidth);
// generate joint truth
final int chainLength = 10000;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueHiddenStates = trueJointModel.generateHiddenStateChain(positions);
final List<AFCRHiddenState> trueAFCRSequence = trueHiddenStates.stream().map(trueJointModel::getHiddenStateValue).collect(Collectors.toList());
final double[] trueCopyRatioSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getLog2CopyRatio).toArray();
final double[] trueAlleleFractionSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getMinorAlleleFraction).toArray();
// generate separate af and cr data
final GammaDistribution biasGenerator = AlleleFractionSegmenterUnitTest.getGammaDistribution(trueAFParams, rng);
final double outlierProbability = trueAFParams.getOutlierProbability();
final AllelicCountCollection afData = new AllelicCountCollection();
final List<Double> crData = new ArrayList<>();
final List<Target> crTargets = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final SimpleInterval position = positions.get(n);
final AFCRHiddenState jointState = trueAFCRSequence.get(n);
final double minorFraction = jointState.getMinorAlleleFraction();
final double log2CopyRatio = jointState.getLog2CopyRatio();
if (rng.nextDouble() < hetProportion) {
// het datum
afData.add(AlleleFractionSegmenterUnitTest.generateAllelicCount(minorFraction, position, rng, biasGenerator, outlierProbability));
} else {
//target datum
crTargets.add(new Target(position));
crData.add(CopyRatioSegmenterUnitTest.generateData(trueCauchyWidth, log2CopyRatio, rng));
}
}
final ReadCountCollection rcc = new ReadCountCollection(crTargets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(crData.stream().mapToDouble(x -> x).toArray()));
final JointAFCRSegmenter segmenter = JointAFCRSegmenter.createJointSegmenter(initialNumCRStates, rcc, initialNumAFStates, afData, AllelicPanelOfNormals.EMPTY_PON);
final TargetCollection<SimpleInterval> tc = new HashedListTargetCollection<>(positions);
final List<Pair<SimpleInterval, AFCRHiddenState>> segmentation = segmenter.findSegments();
final List<ACNVModeledSegment> jointSegments = segmentation.stream().map(pair -> {
final SimpleInterval position = pair.getLeft();
final AFCRHiddenState jointState = pair.getRight();
final PosteriorSummary crSummary = PerformJointSegmentation.errorlessPosterior(jointState.getLog2CopyRatio());
final PosteriorSummary afSummary = PerformJointSegmentation.errorlessPosterior(jointState.getMinorAlleleFraction());
return new ACNVModeledSegment(position, crSummary, afSummary);
}).collect(Collectors.toList());
final double[] segmentCopyRatios = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getSegmentMeanPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double[] segmentMinorFractions = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getMinorAlleleFractionPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double averageMinorFractionError = Arrays.stream(MathArrays.ebeSubtract(trueAlleleFractionSequence, segmentMinorFractions)).map(Math::abs).average().getAsDouble();
final double averageCopyRatioError = Arrays.stream(MathArrays.ebeSubtract(trueCopyRatioSequence, segmentCopyRatios)).map(Math::abs).average().getAsDouble();
Assert.assertEquals(averageMinorFractionError, 0, 0.04);
Assert.assertEquals(averageCopyRatioError, 0, 0.04);
}
Aggregations