use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class AlleleFractionSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final List<Double> trueMinorAlleleFractions = Arrays.asList(0.12, 0.32, 0.5);
final double trueMemoryLength = 1e5;
final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
final AlleleFractionHMM trueModel = new AlleleFractionHMM(trueMinorAlleleFractions, trueWeights, trueMemoryLength, AllelicPanelOfNormals.EMPTY_PON, trueParams);
// randomly set positions
final int chainLength = 10000;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
final List<Double> truthMinorFractions = trueStates.stream().map(trueModel::getMinorAlleleFraction).collect(Collectors.toList());
final AllelicCountCollection counts = generateCounts(truthMinorFractions, positions, rng, trueParams);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
final double[] segmentMinorFractions = segments.stream().flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMean()).stream()).mapToDouble(x -> x).toArray();
final double averageMinorFractionError = IntStream.range(0, truthMinorFractions.size()).mapToDouble(n -> Math.abs(segmentMinorFractions[n] - truthMinorFractions.get(n))).average().getAsDouble();
Assert.assertEquals(averageMinorFractionError, 0, 0.01);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class JointAFCRSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
// probability that a datum is a het i.e. #hets / (#hets + #targets)
final double hetProportion = 0.25;
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final List<AFCRHiddenState> trueJointStates = IntStream.range(0, trueLog2CopyRatios.length).mapToObj(n -> new AFCRHiddenState(trueMinorAlleleFractions[n], trueLog2CopyRatios[n])).collect(Collectors.toList());
final double trueMemoryLength = 1e5;
final double trueCauchyWidth = 0.2;
final int initialNumCRStates = 20;
final int initialNumAFStates = 20;
final AlleleFractionGlobalParameters trueAFParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
final JointAFCRHMM trueJointModel = new JointAFCRHMM(trueJointStates, trueWeights, trueMemoryLength, trueAFParams, AllelicPanelOfNormals.EMPTY_PON, trueCauchyWidth);
// generate joint truth
final int chainLength = 10000;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueHiddenStates = trueJointModel.generateHiddenStateChain(positions);
final List<AFCRHiddenState> trueAFCRSequence = trueHiddenStates.stream().map(trueJointModel::getHiddenStateValue).collect(Collectors.toList());
final double[] trueCopyRatioSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getLog2CopyRatio).toArray();
final double[] trueAlleleFractionSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getMinorAlleleFraction).toArray();
// generate separate af and cr data
final GammaDistribution biasGenerator = AlleleFractionSegmenterUnitTest.getGammaDistribution(trueAFParams, rng);
final double outlierProbability = trueAFParams.getOutlierProbability();
final AllelicCountCollection afData = new AllelicCountCollection();
final List<Double> crData = new ArrayList<>();
final List<Target> crTargets = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final SimpleInterval position = positions.get(n);
final AFCRHiddenState jointState = trueAFCRSequence.get(n);
final double minorFraction = jointState.getMinorAlleleFraction();
final double log2CopyRatio = jointState.getLog2CopyRatio();
if (rng.nextDouble() < hetProportion) {
// het datum
afData.add(AlleleFractionSegmenterUnitTest.generateAllelicCount(minorFraction, position, rng, biasGenerator, outlierProbability));
} else {
//target datum
crTargets.add(new Target(position));
crData.add(CopyRatioSegmenterUnitTest.generateData(trueCauchyWidth, log2CopyRatio, rng));
}
}
final ReadCountCollection rcc = new ReadCountCollection(crTargets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(crData.stream().mapToDouble(x -> x).toArray()));
final JointAFCRSegmenter segmenter = JointAFCRSegmenter.createJointSegmenter(initialNumCRStates, rcc, initialNumAFStates, afData, AllelicPanelOfNormals.EMPTY_PON);
final TargetCollection<SimpleInterval> tc = new HashedListTargetCollection<>(positions);
final List<Pair<SimpleInterval, AFCRHiddenState>> segmentation = segmenter.findSegments();
final List<ACNVModeledSegment> jointSegments = segmentation.stream().map(pair -> {
final SimpleInterval position = pair.getLeft();
final AFCRHiddenState jointState = pair.getRight();
final PosteriorSummary crSummary = PerformJointSegmentation.errorlessPosterior(jointState.getLog2CopyRatio());
final PosteriorSummary afSummary = PerformJointSegmentation.errorlessPosterior(jointState.getMinorAlleleFraction());
return new ACNVModeledSegment(position, crSummary, afSummary);
}).collect(Collectors.toList());
final double[] segmentCopyRatios = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getSegmentMeanPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double[] segmentMinorFractions = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getMinorAlleleFractionPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double averageMinorFractionError = Arrays.stream(MathArrays.ebeSubtract(trueAlleleFractionSequence, segmentMinorFractions)).map(Math::abs).average().getAsDouble();
final double averageCopyRatioError = Arrays.stream(MathArrays.ebeSubtract(trueCopyRatioSequence, segmentCopyRatios)).map(Math::abs).average().getAsDouble();
Assert.assertEquals(averageMinorFractionError, 0, 0.04);
Assert.assertEquals(averageCopyRatioError, 0, 0.04);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class AlleleFractionSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
final double trueMemoryLength = 1e5;
final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);
final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class CalculatePulldownPhasePosteriorsIntegrationTest method testCalculatePhasePosteriors.
/**
* Uses {@link AlleleFractionSimulatedData} to test recovery of phase indicators. Phase with highest posterior
* probability is compared to the true phase; we require that
* {@link CalculatePulldownPhasePosteriorsIntegrationTest#FRACTION_OF_INDICATORS_CORRECT_THRESHOLD} of the
* indicators are recovered correctly.
*/
@Test
public void testCalculatePhasePosteriors() {
final double averageHetsPerSegment = 100;
final int numSegments = 100;
final int averageDepth = 100;
final double biasMean = 1.1;
final double biasVariance = 0.01;
final double outlierProbability = 0.02;
final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, biasMean, biasVariance, outlierProbability);
final SegmentedGenome segmentedGenome = simulatedData.getSegmentedGenome();
final AlleleFractionState trueState = simulatedData.getTrueState();
final AlleleFractionSimulatedData.PhaseIndicators truePhases = simulatedData.getTruePhases();
final AllelicCountCollection counts = new AllelicCountCollection();
//note that chromosomes are in lexicographical order
segmentedGenome.getGenome().getSNPs().targets().stream().forEach(counts::add);
final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = CalculatePulldownPhasePosteriors.calculatePhasePosteriors(counts, segmentedGenome.getSegments(), trueState, AllelicPanelOfNormals.EMPTY_PON);
int numIndicatorsCorrect = 0;
//order is ALT_MINOR, REF_MINOR, OUTLIER
final Iterator<AlleleFractionIndicator> phaseIterator = truePhases.iterator();
final Iterator<AllelicCountWithPhasePosteriors> countWithPhasePosteriorsIterator = countsWithPhasePosteriors.getCounts().iterator();
while (phaseIterator.hasNext() && countWithPhasePosteriorsIterator.hasNext()) {
final AlleleFractionIndicator truePhase = phaseIterator.next();
final AllelicCountWithPhasePosteriors countWithPhasePosteriors = countWithPhasePosteriorsIterator.next();
final List<Double> phaseProbabilities = Arrays.asList(countWithPhasePosteriors.getAltMinorProb(), countWithPhasePosteriors.getRefMinorProb(), countWithPhasePosteriors.getOutlierProb());
final int indexOfMaxProbPhase = phaseProbabilities.indexOf(Collections.max(phaseProbabilities));
final AlleleFractionIndicator maxProbPhase = AlleleFractionIndicator.values()[indexOfMaxProbPhase];
if (maxProbPhase.equals(truePhase)) {
numIndicatorsCorrect++;
}
}
final double fractionOfIndicatorsCorrect = (double) numIndicatorsCorrect / countsWithPhasePosteriors.getCounts().size();
Assert.assertTrue(fractionOfIndicatorsCorrect >= FRACTION_OF_INDICATORS_CORRECT_THRESHOLD);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class CalculatePulldownPhasePosteriors method doWork.
@Override
public Object doWork() {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
//read counts, segments, and parameters from files
final AllelicCountCollection counts = new AllelicCountCollection(snpCountsFile);
final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
final AlleleFractionState state = reconstructState(segments, parametersFile);
//load allelic-bias panel of normals if provided
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
//calculate phase posteriors
final List<SimpleInterval> unmodeledSegments = segments.stream().map(ACNVModeledSegment::getInterval).collect(Collectors.toList());
final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = calculatePhasePosteriors(counts, unmodeledSegments, state, allelicPoN);
//write phase posteriors to file with same verbosity as input file
countsWithPhasePosteriors.write(outputFile, counts.getVerbosity());
return "SUCCESS";
}
Aggregations