use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCounts method doWork.
@Override
protected Object doWork() {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
IOUtils.canReadFile(ponFile);
try (final HDF5File hdf5PoNFile = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(hdf5PoNFile, logger);
final TargetCollection<Target> targetCollection = readTargetCollection(targetFile);
final ReadCountCollection proportionalCoverageProfile = readInputReadCounts(readCountsFile, targetCollection);
final PCATangentNormalizationResult tangentNormalizationResult = pon.normalize(proportionalCoverageProfile);
;
tangentNormalizationResult.write(getCommandLine(), tangentNormalizationOutFile, preTangentNormalizationOutFile, betaHatsOutFile, fntOutFile);
return "SUCCESS";
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertFactorNormalizedValues.
private void assertFactorNormalizedValues(final ReadCountCollection input, final ReadCountCollection factorNormalized) {
try (final HDF5File ponReader = new HDF5File(TEST_PON)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final double[] targetFactors = pon.getTargetFactors();
final List<String> ponTargets = pon.getTargetNames();
final Map<String, Integer> ponTargetIndexes = new HashMap<>(ponTargets.size());
for (int i = 0; i < ponTargets.size(); i++) {
ponTargetIndexes.put(ponTargets.get(i), i);
}
final RealMatrix inputCounts = input.counts();
final RealMatrix factorNormalizedCounts = factorNormalized.counts();
for (int i = 0; i < factorNormalizedCounts.getRowDimension(); i++) {
final double factor = targetFactors[ponTargetIndexes.get(factorNormalized.targets().get(i).getName())];
final double[] inputValues = inputCounts.getRow(i);
final double[] outputValues = factorNormalizedCounts.getRow(i);
for (int j = 0; j < inputValues.length; j++) {
final double expected = inputValues[j] / factor;
Assert.assertEquals(outputValues[j], expected, 0.0000001, "" + i + " , " + j);
}
}
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class CreatePanelOfNormalsIntegrationTest method assertBasicPoNAssumptions.
private void assertBasicPoNAssumptions(final File ponFile, final File initialTargetsFileUsedToCreatePoN) {
try (final HDF5File ponHDF5File = new HDF5File(ponFile)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
Assert.assertTrue(pon.getTargets().size() >= pon.getPanelTargets().size());
Assert.assertTrue(pon.getRawTargets().size() > pon.getTargets().size());
Assert.assertTrue(pon.getTargetNames().size() == pon.getTargets().size());
Assert.assertTrue(pon.getPanelTargetNames().size() == pon.getPanelTargetNames().size());
Assert.assertTrue(pon.getRawTargetNames().size() == pon.getRawTargetNames().size());
if (initialTargetsFileUsedToCreatePoN != null) {
final TargetCollection<Target> tc = TargetArgumentCollection.readTargetCollection(initialTargetsFileUsedToCreatePoN);
Assert.assertEquals(pon.getRawTargets().size(), tc.targetCount());
// Check that the raw targets are the same
Assert.assertTrue(IntStream.of(new IntRange(0, pon.getRawTargets().size() - 1).toArray()).boxed().map(i -> pon.getRawTargets().get(i).equals(tc.target(i))).allMatch(t -> t));
}
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.
the class CreatePanelOfNormals method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
if (blacklistOutFile == null) {
blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
}
if (targetWeightsOutFile == null) {
targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
}
// Check parameters and load values to meet the backend PoN creation interface
validateArguments();
final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
// Create the PoN, including QC, if specified.
if (!isNoQc && !dryRun) {
logger.info("QC: Beginning creation of QC PoN...");
final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
logger.info("QC: QC PoN created...");
logger.info("QC: Collecting suspicious samples...");
try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
// If no suspicious samples were found, just redo the PoN reduction to save time.
if (failingSampleNames.size() != 0) {
logger.info("QC: Suspicious sample list created...");
logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
} else {
logger.info("QC: No suspicious samples found ...");
logger.info("Creating final PoN only redo'ing the reduction step ...");
HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
}
}
} else {
logger.info("Creating PoN directly (skipping QC)...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
}
if (!dryRun) {
logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
writeTargetWeightsFile(outFile, targetWeightsOutFile);
}
logger.info("Done...");
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertFactorNormalizedValues.
private void assertFactorNormalizedValues(final ReadCountCollection input, final ReadCountCollection factorNormalized) {
try (final HDF5File ponReader = new HDF5File(TEST_PON)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final double[] targetFactors = pon.getTargetFactors();
final List<String> ponTargets = pon.getTargetNames();
final Map<String, Integer> ponTargetIndexes = new HashMap<>(ponTargets.size());
for (int i = 0; i < ponTargets.size(); i++) {
ponTargetIndexes.put(ponTargets.get(i), i);
}
final RealMatrix inputCounts = input.counts();
final RealMatrix factorNormalizedCounts = factorNormalized.counts();
for (int i = 0; i < factorNormalizedCounts.getRowDimension(); i++) {
final double factor = targetFactors[ponTargetIndexes.get(factorNormalized.targets().get(i).getName())];
final double[] inputValues = inputCounts.getRow(i);
final double[] outputValues = factorNormalizedCounts.getRow(i);
for (int j = 0; j < inputValues.length; j++) {
final double expected = inputValues[j] / factor;
Assert.assertEquals(outputValues[j], expected, 0.0000001, "" + i + " , " + j);
}
}
}
}
Aggregations