use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class GCCorrectorUnitTest method testGCCorrection.
@Test
public void testGCCorrection() {
final int numSamples = 5;
final int numTargets = 10000;
final Pair<ReadCountCollection, double[]> data = GCBiasSimulatedData.simulatedData(numTargets, numSamples);
final ReadCountCollection rcc1 = data.getLeft();
final double[] gcContentByTarget = data.getRight();
final ReadCountCollection correctedCounts1 = GCCorrector.correctCoverage(rcc1, gcContentByTarget);
final double[] correctedNoiseBySample = GATKProtectedMathUtils.columnStdDevs(correctedCounts1.counts());
Arrays.stream(correctedNoiseBySample).forEach(x -> Assert.assertTrue(x < 0.02));
//check that GC correction is approximately idempotent -- if you correct again, very little should happen
final ReadCountCollection correctedCounts2 = GCCorrector.correctCoverage(correctedCounts1, gcContentByTarget);
final double change1 = correctedCounts1.counts().subtract(rcc1.counts()).getFrobeniusNorm();
final double change2 = correctedCounts2.counts().subtract(correctedCounts1.counts()).getFrobeniusNorm();
Assert.assertTrue(change2 < change1 / 10);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class TargetCoverageSexGenotyper method doWork.
@Override
protected Object doWork() {
/* check args */
IOUtils.canReadFile(inputRawReadCountsFile);
IOUtils.canReadFile(inputContigAnnotsFile);
Utils.validateArg(baselineMappingErrorProbability > 0 && baselineMappingErrorProbability < 1, "Must have 0 < baseline mapping error probability < 1.");
/* read input read counts */
final ReadCountCollection rawReadCounts;
try {
logger.info("Parsing raw read count collection file...");
rawReadCounts = ReadCountCollectionUtils.parse(inputRawReadCountsFile);
} catch (final IOException ex) {
throw new UserException.CouldNotReadInputFile("Could not read raw read count collection file");
}
/* parse contig genotype ploidy annotations */
final List<ContigGermlinePloidyAnnotation> contigGermlinePloidyAnnotationList;
logger.info("Parsing contig genotype ploidy annotations file...");
contigGermlinePloidyAnnotationList = ContigGermlinePloidyAnnotationTableReader.readContigGermlinePloidyAnnotationsFromFile(inputContigAnnotsFile);
/* parse target list */
final List<Target> inputTargetList;
if (inputTargetListFile != null) {
inputTargetList = TargetTableReader.readTargetFile(inputTargetListFile);
} else {
logger.info("Target list was not provided -- getting target list from the read count collection.");
inputTargetList = rawReadCounts.targets();
}
/* perform genotyping */
final TargetCoverageSexGenotypeCalculator genotyper = new TargetCoverageSexGenotypeCalculator(rawReadCounts, inputTargetList, contigGermlinePloidyAnnotationList, baselineMappingErrorProbability);
final SexGenotypeDataCollection sampleSexGenotypeCollection = genotyper.inferSexGenotypes();
/* save results */
try {
sampleSexGenotypeCollection.write(new FileWriter(outputSampleGenotypesFile));
} catch (final IOException ex) {
throw new UserException.CouldNotCreateOutputFile("Could not write inferred sample genotypes to file", ex);
}
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class PerformCopyRatioSegmentation method doWork.
@Override
public Object doWork() {
final String sampleName = ReadCountCollectionUtils.getSampleNameForCLIsFromReadCountsFile(new File(coverageFile));
final ReadCountCollection rcc;
try {
rcc = ReadCountCollectionUtils.parse(new File(coverageFile));
} catch (final IOException ex) {
throw new UserException.BadInput("could not read input file");
}
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(initialNumStates, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, false);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class GCCorrectorUnitTest method testGCCorrection.
@Test
public void testGCCorrection() {
final int numSamples = 5;
final int numTargets = 10000;
final Pair<ReadCountCollection, double[]> data = GCBiasSimulatedData.simulatedData(numTargets, numSamples);
final ReadCountCollection rcc1 = data.getLeft();
final double[] gcContentByTarget = data.getRight();
final ReadCountCollection correctedCounts1 = GCCorrector.correctCoverage(rcc1, gcContentByTarget);
final double[] correctedNoiseBySample = GATKProtectedMathUtils.columnStdDevs(correctedCounts1.counts());
Arrays.stream(correctedNoiseBySample).forEach(x -> Assert.assertTrue(x < 0.02));
//check that GC correction is approximately idempotent -- if you correct again, very little should happen
final ReadCountCollection correctedCounts2 = GCCorrector.correctCoverage(correctedCounts1, gcContentByTarget);
final double change1 = correctedCounts1.counts().subtract(rcc1.counts()).getFrobeniusNorm();
final double change2 = correctedCounts2.counts().subtract(correctedCounts1.counts()).getFrobeniusNorm();
Assert.assertTrue(change2 < change1 / 10);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.
/**
* Processes raw read counts and targets:
* <dl>
* <dt> If more than one sample is present in the collection, filters out fully uncovered targets
* from read counts and removes the uncovered targets from the target list</dt>
*
* <dt> Otherwise, does nothing and warns the user
* </dt>
* </dl>
*
* @param rawReadCounts raw read count collection
* @param targetList user provided target list
* @return pair of processed read counts and targets
*/
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
final ReadCountCollection finalReadCounts;
final List<Target> finalTargetList;
/* remove totally uncovered targets */
if (rawReadCounts.columnNames().size() > 1) {
finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
} else {
final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
final long numAllTargets = rawReadCounts.targets().size();
logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
finalReadCounts = rawReadCounts;
finalTargetList = targetList;
}
return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Aggregations