use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.
the class ACNVModeller method fitModel.
/**
* Performs Markov-Chain Monte Carlo model fitting using the
* number of total samples and number of burn-in samples pecified at construction.
*/
public void fitModel() {
//perform MCMC to generate posterior samples
logger.info("Fitting copy-ratio model...");
copyRatioModeller = new CopyRatioModeller(segmentedGenome);
copyRatioModeller.fitMCMC(numSamplesCopyRatio, numBurnInCopyRatio);
logger.info("Fitting allele-fraction model...");
alleleFractionModeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
alleleFractionModeller.fitMCMC(numSamplesAlleleFraction, numBurnInAlleleFraction);
//update list of ACNVModeledSegment with new PosteriorSummaries
segments.clear();
final List<SimpleInterval> unmodeledSegments = segmentedGenome.getSegments();
final List<PosteriorSummary> segmentMeansPosteriorSummaries = copyRatioModeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<PosteriorSummary> minorAlleleFractionsPosteriorSummaries = alleleFractionModeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
for (int segment = 0; segment < unmodeledSegments.size(); segment++) {
segments.add(new ACNVModeledSegment(unmodeledSegments.get(segment), segmentMeansPosteriorSummaries.get(segment), minorAlleleFractionsPosteriorSummaries.get(segment)));
}
isModelFit = true;
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.
the class AllelicCNV method performSNPSegmentationStep.
//segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
//(final segment file is output as a side effect)
private List<SimpleInterval> performSNPSegmentationStep(final String sampleName, final Genome genome) {
//initial segmentation of SNPs on per-SNP MLE minor allele fraction, assuming no allelic bias
//(segments are written to temporary file)
logger.info("Performing initial SNP segmentation (assuming no allelic bias)...");
final File snpSegmentFile;
try {
snpSegmentFile = File.createTempFile(outputPrefix + "-" + SNP_MAF_SEG_FILE_TAG + "-tmp", SEGMENT_FILE_SUFFIX);
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile("Could not create temporary SNP segmentation file.", e);
}
List<SimpleInterval> snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, snpSegmentFile, INITIAL_ALLELIC_BIAS_GUESS);
logger.info("Initial number of SNP segments: " + snpSegments.size());
final Set<List<SimpleInterval>> snpSegmentationsFound = new HashSet<>();
//perform iterations of SNP segmentation until convergence
for (int numIterations = 1; numIterations <= maxNumSNPSegmentationIterations; numIterations++) {
logger.info("SNP-segmentation iteration: " + numIterations);
snpSegmentationsFound.add(snpSegments);
//use AlleleFractionInitializer to determine mean of global allelic-bias distribution given current SNP segmentation
final double allelicBias = calculateAllelicBias(genome, snpSegmentFile);
logger.info(String.format("Mean allelic bias: %.3f", allelicBias));
//resegment SNPs on per-SNP MLE minor allele fraction assuming mean allelic bias found by AlleleFractionInitializer
//(segments are written to temporary file)
snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, snpSegmentFile, allelicBias);
logger.info("Number of SNP segments: " + snpSegments.size());
//stop if a previously found SNP segmentation is found again
if (snpSegmentationsFound.contains(snpSegments)) {
//take final SNP segmentation to be the same as the last one found
logger.info("Final number of SNP segments: " + snpSegments.size());
final File finalSNPSegmentFile = new File(outputPrefix + "-" + SNP_MAF_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, finalSNPSegmentFile, allelicBias);
logSegmentFileWrittenMessage(finalSNPSegmentFile);
break;
}
}
return snpSegments;
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.
the class AllelicCNV method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
validateArguments();
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
final String originalLogLevel = (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
ctx.setLogLevel("WARN");
//the string after the final slash in the output prefix (which may be an absolute file path) will be used as the sample name
final String sampleName = outputPrefix.substring(outputPrefix.lastIndexOf("/") + 1);
logger.info("Starting workflow for " + sampleName + "...");
//make Genome from input target coverages and SNP counts
logger.info("Loading input files...");
final Genome genome = new Genome(tangentNormalizedCoverageFile, snpCountsFile);
//load allelic-bias panel of normals if provided
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
//load target-coverage segments from input file
final List<ModeledSegment> targetSegmentsWithCalls = SegmentUtils.readModeledSegmentsFromSegmentFile(targetSegmentsFile);
logger.info("Number of input target-coverage segments: " + targetSegmentsWithCalls.size());
//merge copy-neutral and uncalled segments (unless disabled) and fix up target-segment start breakpoints
logger.info("Preparing target-coverage segments...");
final List<SimpleInterval> targetSegments = prepareTargetSegments(genome, targetSegmentsWithCalls);
//segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
//(final segment file is output as a side effect)
logger.info("Performing SNP segmentation...");
final List<SimpleInterval> snpSegments = performSNPSegmentationStep(sampleName, genome);
//combine SNP and target-coverage segments
logger.info("Combining SNP and target-coverage segments...");
final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, genome);
logger.info("Number of segments after segment union: " + unionedSegments.size());
final File unionedSegmentsFile = new File(outputPrefix + "-" + UNION_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
SegmentUtils.writeSegmentFileWithNumTargetsAndNumSNPs(unionedSegmentsFile, unionedSegments, genome);
logSegmentFileWrittenMessage(unionedSegmentsFile);
//small-segment merging (note that X and Y are always small segments and dropped, since GATK CNV drops them)
logger.info("Merging small segments...");
final SegmentedGenome segmentedGenomeWithSmallSegments = new SegmentedGenome(unionedSegments, genome);
final SegmentedGenome segmentedGenome = segmentedGenomeWithSmallSegments.mergeSmallSegments(smallSegmentTargetNumberThreshold);
logger.info("Number of segments after small-segment merging: " + segmentedGenome.getSegments().size());
//initial MCMC model fitting performed by ACNVModeller constructor
final ACNVModeller modeller = new ACNVModeller(segmentedGenome, allelicPoN, numSamplesCopyRatio, numBurnInCopyRatio, numSamplesAlleleFraction, numBurnInAlleleFraction, ctx);
//write initial segments and parameters to file
writeACNVModeledSegmentAndParameterFiles(modeller, INITIAL_FIT_FILE_TAG);
//similar-segment merging (segment files are output for each merge iteration)
logger.info("Merging similar segments...");
performSimilarSegmentMergingStep(modeller);
//write final segments and parameters to file
writeACNVModeledSegmentAndParameterFiles(modeller, FINAL_FIT_FILE_TAG);
ctx.setLogLevel(originalLogLevel);
logger.info("SUCCESS: Allelic CNV run complete for sample " + sampleName + ".");
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.
the class AlleleFractionSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final List<Double> trueMinorAlleleFractions = Arrays.asList(0.12, 0.32, 0.5);
final double trueMemoryLength = 1e5;
final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
final AlleleFractionHMM trueModel = new AlleleFractionHMM(trueMinorAlleleFractions, trueWeights, trueMemoryLength, AllelicPanelOfNormals.EMPTY_PON, trueParams);
// randomly set positions
final int chainLength = 10000;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
final List<Double> truthMinorFractions = trueStates.stream().map(trueModel::getMinorAlleleFraction).collect(Collectors.toList());
final AllelicCountCollection counts = generateCounts(truthMinorFractions, positions, rng, trueParams);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
final double[] segmentMinorFractions = segments.stream().flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMean()).stream()).mapToDouble(x -> x).toArray();
final double averageMinorFractionError = IntStream.range(0, truthMinorFractions.size()).mapToDouble(n -> Math.abs(segmentMinorFractions[n] - truthMinorFractions.get(n))).average().getAsDouble();
Assert.assertEquals(averageMinorFractionError, 0, 0.01);
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.
the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> data = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final double copyRatio = trueLog2CopyRatios[trueState];
final double observed = generateData(trueStandardDeviation, copyRatio, rng);
data.add(observed);
}
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Aggregations