use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class GermlineCNVCallerIntegrationTest method runCaseSampleCallingTestOnLearnedModelParams.
private void runCaseSampleCallingTestOnLearnedModelParams(final String... extraArgs) {
runCommandLine(getCallingOnLearnedModelArgs(ArrayUtils.addAll(new String[] { "--" + GermlineCNVCaller.INPUT_MODEL_PATH_LONG_NAME, LEARNING_MODEL_OUTPUT_PATH.getAbsolutePath() }, extraArgs)));
final List<Target> callingTargets = TargetTableReader.readTargetFile(new File(CALLING_POSTERIORS_OUTPUT_PATH, CoverageModelGlobalConstants.TARGET_LIST_OUTPUT_FILE));
reportCopyNumberSummaryStatistics(CALLING_POSTERIORS_OUTPUT_PATH, TEST_CALLING_COMBINED_COPY_NUMBER_FILE, callingTargets, CALLING_SEX_GENOTYPES_DATA);
logger.info("Copy number concordance test passed for case sample calling");
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.
/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
final int numSamples = calledCopyNumberCollection.columnNames().size();
final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
/* calculation various summary statistics */
final AbstractUnivariateStatistic calculator = new Mean();
final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
/* log */
logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
logger.info("Duplication statistics: " + dupMedianRates.toString());
logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputTargetNumbers.
private void checkOutputTargetNumbers(final File targetsFile, final File vcfOutput) {
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext context : outputVariants) {
final List<Target> overlappingTargets = targets.targets(context);
Assert.assertEquals(context.getAttributeAsInt(XHMMSegmentGenotyper.NUMBER_OF_TARGETS_KEY, -1), overlappingTargets.size());
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method doWork.
@Override
protected Object doWork() {
final TargetCollection<Target> targets = targetArguments.readTargetCollection(false);
final VCFFileReader truthReader = openVCFReader(truthFile);
final VCFFileReader callsReader = openVCFReader(callsFile);
final GenotypeEvaluationRecordWriter caseWriter = openGenotypeEvaluationOutputWriter(caseDetailOutputFile);
if (samples.isEmpty()) {
samples = composeSetOfSamplesToEvaluate(callsReader);
}
final VariantContextWriter outputWriter = openVCFWriter(outputFile, samples);
final Map<String, EvaluationSampleSummaryRecord> sampleStats = samples.stream().collect(Collectors.toMap(s -> s, EvaluationSampleSummaryRecord::new));
final List<SimpleInterval> intervals = composeListOfProcessingIntervalsFromInputs(truthReader, callsReader);
for (final SimpleInterval interval : intervals) {
for (final VariantEvaluationContext vc : processInterval(truthReader, callsReader, interval, targets)) {
outputWriter.add(vc);
outputCases(caseWriter, vc, targets);
updateSampleStats(sampleStats, vc);
}
}
truthReader.close();
callsReader.close();
outputWriter.close();
closeCaseRecordWriter(caseWriter);
writeSampleSummaryFile(sampleSummaryOutputFile, sampleStats);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method processInterval.
/**
* Processes a cluster of truth and called variants that may overlap over a genome region.
* <p>
* It returns the list of {@link VariantEvaluationContext} instances to be output for the given interval. These
* are already sorted by coordinates.
* </p>
* @param truthReader reader to the truth variants.
* @param callsReader reader to the called variants.
* @param interval the interval to analyze.
* @return never {@code null}.
*/
private List<VariantEvaluationContext> processInterval(final VCFFileReader truthReader, final VCFFileReader callsReader, final SimpleInterval interval, final TargetCollection<Target> targets) {
final List<VariantContext> truthVariants = variantQueryToList(truthReader, interval);
final List<VariantContext> callsVariants = variantQueryToList(callsReader, interval);
final List<VariantEvaluationContext> evaluatedVariants = new ArrayList<>(truthVariants.size() + callsVariants.size());
for (final VariantContext truth : truthVariants) {
// skip truth that does not overlap a single target.
if (targets.targetCount(truth) == 0) {
continue;
}
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> IntervalUtils.overlaps(truth, vc)).collect(Collectors.toList());
evaluatedVariants.add(composeTruthOverlappingVariantContext(truth, overlappingCalls, targets));
}
for (final VariantContext call : callsVariants) {
// skip call that does not overlap a single target (the user might want to call on a smaller set of targets)
if (targets.targetCount(call) == 0) {
continue;
}
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> IntervalUtils.overlaps(call, vc)).collect(Collectors.toList());
if (overlappingTruth.isEmpty()) {
evaluatedVariants.add(composeNonTruthOverlappingVariantContext(call, targets));
}
}
return evaluatedVariants.stream().sorted(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR).map(this::applyVariantContextFilters).collect(Collectors.toList());
}
Aggregations