use of org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection in project gatk-protected by broadinstitute.
the class GermlineCNVCaller method runPipeline.
/**
* The main routine
*
* @param ctx a nullable Spark context
*/
@Override
protected void runPipeline(@Nullable JavaSparkContext ctx) {
final TargetCollection<Target> optionalTargetsCollections = optionalTargets.readTargetCollection(true);
if (optionalTargetsCollections == null) {
logger.info("No target file was provided: using all targets in the combined read counts table");
}
logger.info("Parsing the read counts table...");
final ReadCountCollection readCounts = loadReadCountCollection(optionalTargetsCollections);
logger.info("Parsing the sample sex genotypes table...");
final SexGenotypeDataCollection sexGenotypeDataCollection = loadSexGenotypeDataCollection();
logger.info("Parsing the germline contig ploidy annotation table...");
final GermlinePloidyAnnotatedTargetCollection ploidyAnnotatedTargetCollection = loadGermlinePloidyAnnotatedTargetCollection(readCounts);
logger.info("Parsing the copy number transition prior table and initializing the caches...");
final IntegerCopyNumberTransitionProbabilityCacheCollection transitionProbabilityCacheCollection = createIntegerCopyNumberTransitionProbabilityCacheCollection();
final IntegerCopyNumberExpectationsCalculator integerCopyNumberExpectationsCalculator = new IntegerCopyNumberExpectationsCalculator(transitionProbabilityCacheCollection, params.getMinLearningReadCount());
final CoverageModelParameters model = getCoverageModelParameters();
Utils.validateArg(model != null || !jobType.equals(JobType.CALL_ONLY), "Model parameters are not given; can not run the tool in the CALL_ONLY mode.");
logger.info("Initializing the EM algorithm workspace...");
final IntegerCopyNumberReferenceStateFactory referenceStateFactory = new IntegerCopyNumberReferenceStateFactory(ploidyAnnotatedTargetCollection);
final CoverageModelEMWorkspace<IntegerCopyNumberState> workspace = new CoverageModelEMWorkspace<>(readCounts, ploidyAnnotatedTargetCollection, sexGenotypeDataCollection, integerCopyNumberExpectationsCalculator, params, model, referenceStateFactory, ctx);
final CoverageModelEMAlgorithm<IntegerCopyNumberState> algo = new CoverageModelEMAlgorithm<>(params, workspace);
switch(jobType) {
case LEARN_AND_CALL:
algo.runExpectationMaximization();
logger.info("Saving the model to disk...");
workspace.writeModel(new File(outputPath, FINAL_MODEL_SUBDIR).getAbsolutePath());
break;
case CALL_ONLY:
algo.runExpectation();
break;
default:
throw new UnsupportedOperationException(String.format("\"%s\" is not recognized as a supported job type", jobType.name()));
}
logger.info("Saving posteriors to disk...");
workspace.writePosteriors(new File(outputPath, FINAL_POSTERIORS_SUBDIR).getAbsolutePath(), CoverageModelEMWorkspace.PosteriorVerbosityLevel.EXTENDED);
}
use of org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection in project gatk-protected by broadinstitute.
the class GermlineCNVCallerIntegrationTest method init.
@BeforeSuite
public void init() throws IOException {
LEARNING_SEX_GENOTYPES_DATA = new SexGenotypeDataCollection(TEST_LEARNING_SAMPLE_SEX_GENOTYPES_FILE);
CALLING_SEX_GENOTYPES_DATA = new SexGenotypeDataCollection(TEST_CALLING_SAMPLE_SEX_GENOTYPES_FILE);
GERMLINE_PLOIDY_ANNOTATIONS = new GermlinePloidyAnnotatedTargetCollection(ContigGermlinePloidyAnnotationTableReader.readContigGermlinePloidyAnnotationsFromFile(TEST_CONTIG_PLOIDY_ANNOTATIONS_FILE), TargetTableReader.readTargetFile(TEST_TARGETS_FILE));
}
use of org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection in project gatk by broadinstitute.
the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.
/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
final int numSamples = calledCopyNumberCollection.columnNames().size();
final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
/* calculation various summary statistics */
final AbstractUnivariateStatistic calculator = new Mean();
final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
/* log */
logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
logger.info("Duplication statistics: " + dupMedianRates.toString());
logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
use of org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection in project gatk by broadinstitute.
the class GermlineCNVCallerIntegrationTest method init.
@BeforeSuite
public void init() throws IOException {
LEARNING_SEX_GENOTYPES_DATA = new SexGenotypeDataCollection(TEST_LEARNING_SAMPLE_SEX_GENOTYPES_FILE);
CALLING_SEX_GENOTYPES_DATA = new SexGenotypeDataCollection(TEST_CALLING_SAMPLE_SEX_GENOTYPES_FILE);
GERMLINE_PLOIDY_ANNOTATIONS = new GermlinePloidyAnnotatedTargetCollection(ContigGermlinePloidyAnnotationTableReader.readContigGermlinePloidyAnnotationsFromFile(TEST_CONTIG_PLOIDY_ANNOTATIONS_FILE), TargetTableReader.readTargetFile(TEST_TARGETS_FILE));
}
use of org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection in project gatk by broadinstitute.
the class GermlineCNVCaller method runPipeline.
/**
* The main routine
*
* @param ctx a nullable Spark context
*/
@Override
protected void runPipeline(@Nullable JavaSparkContext ctx) {
final TargetCollection<Target> optionalTargetsCollections = optionalTargets.readTargetCollection(true);
if (optionalTargetsCollections == null) {
logger.info("No target file was provided: using all targets in the combined read counts table");
}
logger.info("Parsing the read counts table...");
final ReadCountCollection readCounts = loadReadCountCollection(optionalTargetsCollections);
logger.info("Parsing the sample sex genotypes table...");
final SexGenotypeDataCollection sexGenotypeDataCollection = loadSexGenotypeDataCollection();
logger.info("Parsing the germline contig ploidy annotation table...");
final GermlinePloidyAnnotatedTargetCollection ploidyAnnotatedTargetCollection = loadGermlinePloidyAnnotatedTargetCollection(readCounts);
logger.info("Parsing the copy number transition prior table and initializing the caches...");
final IntegerCopyNumberTransitionProbabilityCacheCollection transitionProbabilityCacheCollection = createIntegerCopyNumberTransitionProbabilityCacheCollection();
final IntegerCopyNumberExpectationsCalculator integerCopyNumberExpectationsCalculator = new IntegerCopyNumberExpectationsCalculator(transitionProbabilityCacheCollection, params.getMinLearningReadCount());
final CoverageModelParameters model = getCoverageModelParameters();
Utils.validateArg(model != null || !jobType.equals(JobType.CALL_ONLY), "Model parameters are not given; can not run the tool in the CALL_ONLY mode.");
logger.info("Initializing the EM algorithm workspace...");
final IntegerCopyNumberReferenceStateFactory referenceStateFactory = new IntegerCopyNumberReferenceStateFactory(ploidyAnnotatedTargetCollection);
final CoverageModelEMWorkspace<IntegerCopyNumberState> workspace = new CoverageModelEMWorkspace<>(readCounts, ploidyAnnotatedTargetCollection, sexGenotypeDataCollection, integerCopyNumberExpectationsCalculator, params, model, referenceStateFactory, ctx);
final CoverageModelEMAlgorithm<IntegerCopyNumberState> algo = new CoverageModelEMAlgorithm<>(params, workspace);
switch(jobType) {
case LEARN_AND_CALL:
algo.runExpectationMaximization();
logger.info("Saving the model to disk...");
workspace.writeModel(new File(outputPath, FINAL_MODEL_SUBDIR).getAbsolutePath());
break;
case CALL_ONLY:
algo.runExpectation();
break;
default:
throw new UnsupportedOperationException(String.format("\"%s\" is not recognized as a supported job type", jobType.name()));
}
logger.info("Saving posteriors to disk...");
workspace.writePosteriors(new File(outputPath, FINAL_POSTERIORS_SUBDIR).getAbsolutePath(), CoverageModelEMWorkspace.PosteriorVerbosityLevel.EXTENDED);
}
Aggregations