Search in sources :

Example 71 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMModelUnitTest method testLogTransitionProbabilityInDifferentChromosomes.

// different chromosomes are independent Markov chains; hence the transition probabilities should equal the prior
// probabilities
@Test(dataProvider = "testData", dependsOnMethods = { "testInstantiation", "testLogPrior" })
public void testLogTransitionProbabilityInDifferentChromosomes(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
    final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
    final Target fromTarget = new Target("target1", new SimpleInterval("1", 1, 100));
    final Target toTarget = new Target("target2", new SimpleInterval("2", 1, 100));
    assertTransitionProbabilities(eventStartProbability, meanEventSize, model, fromTarget, toTarget);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 72 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMModelUnitTest method testLogTransitionProbability.

@Test(dataProvider = "testData", dependsOnMethods = "testInstantiation")
public void testLogTransitionProbability(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
    final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
    final Target fromTarget = new Target("target1", new SimpleInterval("1", 1000000000, 1000000100));
    for (final int distance : TEST_TARGET_DISTANCES) {
        final Target toTargetDownstream = new Target("target2", new SimpleInterval("1", 1000000100 + distance + 1, 1000000200 + distance + 1));
        final Target toTargetUpstream = new Target("target2", new SimpleInterval("1", 999999900 - distance - 1, 1000000000 - distance - 1));
        for (final Target toTarget : Arrays.asList(toTargetDownstream, toTargetUpstream)) {
            assertTransitionProbabilities(eventStartProbability, meanEventSize, model, fromTarget, toTarget);
        }
    }
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 73 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class CoverageModelParameters method read.

/**
     * Reads the model from disk.
     *
     * @param modelPath input model path
     * @return an instance of {@link CoverageModelParameters}
     */
public static CoverageModelParameters read(@Nonnull final String modelPath) {
    final File modelPathFile = new File(Utils.nonNull(modelPath, "The input model path must be non-null"));
    Utils.validateArg(modelPathFile.exists(), "The model path does not exist: " + modelPathFile.getAbsolutePath());
    final File targetListFile = new File(modelPath, CoverageModelGlobalConstants.TARGET_LIST_OUTPUT_FILE);
    final List<Target> targetList;
    try (final Reader reader = new FileReader(targetListFile)) {
        targetList = TargetTableReader.readTargetFromReader(targetListFile.getAbsolutePath(), reader);
    } catch (final IOException ex) {
        throw new UserException.CouldNotCreateOutputFile(targetListFile, "Could not read targets interval list");
    }
    /* all models must provide mean log bias and target-specific unexplained variance */
    final File targetMeanLogBiasFile = new File(modelPath, CoverageModelGlobalConstants.TARGET_MEAN_LOG_BIAS_OUTPUT_FILE);
    final INDArray targetMeanLogBias = Nd4jIOUtils.readNDArrayMatrixFromTextFile(targetMeanLogBiasFile).transpose();
    final File targetUnexplainedVarianceFile = new File(modelPath, CoverageModelGlobalConstants.TARGET_UNEXPLAINED_VARIANCE_OUTPUT_FILE);
    final INDArray targetUnexplainedVariance = Nd4jIOUtils.readNDArrayMatrixFromTextFile(targetUnexplainedVarianceFile).transpose();
    /* if the bias covariates file is found, then the model has bias covariates enabled */
    final File meanBiasCovariatesFile = new File(modelPath, CoverageModelGlobalConstants.MEAN_BIAS_COVARIATES_OUTPUT_FILE);
    final INDArray meanBiasCovariates, biasCovariatesARDCoefficients;
    if (meanBiasCovariatesFile.exists()) {
        meanBiasCovariates = Nd4jIOUtils.readNDArrayMatrixFromTextFile(meanBiasCovariatesFile);
        /* check to see if we have ARD coefficients */
        final File biasCovariatesARDCoefficientsFile = new File(modelPath, CoverageModelGlobalConstants.BIAS_COVARIATES_ARD_COEFFICIENTS_OUTPUT_FILE);
        if (biasCovariatesARDCoefficientsFile.exists()) {
            /* ARD enabled */
            biasCovariatesARDCoefficients = Nd4jIOUtils.readNDArrayMatrixFromTextFile(biasCovariatesARDCoefficientsFile);
        } else {
            biasCovariatesARDCoefficients = null;
        }
    } else {
        meanBiasCovariates = null;
        biasCovariatesARDCoefficients = null;
    }
    return new CoverageModelParameters(targetList, targetMeanLogBias, targetUnexplainedVariance, meanBiasCovariates, biasCovariatesARDCoefficients);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 74 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class CoverageModelParameters method adaptModelToReadCountCollection.

/**
     * This method "adapts" a model to a read count collection in the following sense:
     *
     *     - removes targets that are not included in the model from the read counts collection
     *     - removes targets that are in the read count collection from the model
     *     - rearranges model targets in the same order as read count collection targets
     *
     * The modifications are not done in-place and the original input parameters remain intact.
     *
     * @param model a model
     * @param readCounts a read count collection
     * @return a pair of model and read count collection
     */
public static ImmutablePair<CoverageModelParameters, ReadCountCollection> adaptModelToReadCountCollection(@Nonnull final CoverageModelParameters model, @Nonnull final ReadCountCollection readCounts, @Nonnull final Logger logger) {
    logger.info("Adapting model to read counts...");
    Utils.nonNull(model, "The model parameters must be non-null");
    Utils.nonNull(readCounts, "The read count collection must be non-null");
    Utils.nonNull(logger, "The logger must be non-null");
    final List<Target> modelTargetList = model.getTargetList();
    final List<Target> readCountsTargetList = readCounts.targets();
    final Set<Target> mutualTargetSet = Sets.intersection(new HashSet<>(modelTargetList), new HashSet<>(readCountsTargetList));
    final List<Target> mutualTargetList = readCountsTargetList.stream().filter(mutualTargetSet::contains).collect(Collectors.toList());
    logger.info("Number of mutual targets: " + mutualTargetList.size());
    Utils.validateArg(mutualTargetList.size() > 0, "The intersection between model targets and targets from read count" + " collection is empty. Please check there the model is compatible with the given read count" + " collection.");
    if (modelTargetList.size() > mutualTargetList.size()) {
        logger.info("The following targets dropped from the model: " + Sets.difference(new HashSet<>(modelTargetList), mutualTargetSet).stream().map(Target::getName).collect(Collectors.joining(", ", "[", "]")));
    }
    if (readCountsTargetList.size() > mutualTargetList.size()) {
        logger.info("The following targets dropped from read counts: " + Sets.difference(new HashSet<>(readCountsTargetList), mutualTargetSet).stream().map(Target::getName).collect(Collectors.joining(", ", "[", "]")));
    }
    /* the targets in {@code subsetReadCounts} follow the original order of targets in {@code readCounts} */
    final ReadCountCollection subsetReadCounts = readCounts.subsetTargets(mutualTargetSet);
    /* fetch original model parameters */
    final INDArray originalModelTargetMeanBias = model.getTargetMeanLogBias();
    final INDArray originalModelTargetUnexplainedVariance = model.getTargetUnexplainedVariance();
    final INDArray originalModelMeanBiasCovariates = model.getMeanBiasCovariates();
    /* re-arrange targets, mean log bias, and target-specific unexplained variance */
    final Map<Target, Integer> modelTargetsToIndexMap = IntStream.range(0, modelTargetList.size()).mapToObj(ti -> ImmutablePair.of(modelTargetList.get(ti), ti)).collect(Collectors.toMap(Pair<Target, Integer>::getLeft, Pair<Target, Integer>::getRight));
    final int[] newTargetIndicesInOriginalModel = mutualTargetList.stream().mapToInt(modelTargetsToIndexMap::get).toArray();
    final INDArray newModelTargetMeanBias = Nd4j.create(new int[] { 1, mutualTargetList.size() });
    final INDArray newModelTargetUnexplainedVariance = Nd4j.create(new int[] { 1, mutualTargetList.size() });
    IntStream.range(0, mutualTargetList.size()).forEach(ti -> {
        newModelTargetMeanBias.put(0, ti, originalModelTargetMeanBias.getDouble(0, newTargetIndicesInOriginalModel[ti]));
        newModelTargetUnexplainedVariance.put(0, ti, originalModelTargetUnexplainedVariance.getDouble(0, newTargetIndicesInOriginalModel[ti]));
    });
    /* if model has bias covariates and/or ARD, re-arrange mean/var of bias covariates as well */
    final INDArray newModelMeanBiasCovariates;
    if (model.isBiasCovariatesEnabled()) {
        newModelMeanBiasCovariates = Nd4j.create(new int[] { mutualTargetList.size(), model.getNumLatents() });
        IntStream.range(0, mutualTargetList.size()).forEach(ti -> {
            newModelMeanBiasCovariates.get(NDArrayIndex.point(ti), NDArrayIndex.all()).assign(originalModelMeanBiasCovariates.get(NDArrayIndex.point(newTargetIndicesInOriginalModel[ti]), NDArrayIndex.all()));
        });
    } else {
        newModelMeanBiasCovariates = null;
    }
    return ImmutablePair.of(new CoverageModelParameters(mutualTargetList, newModelTargetMeanBias, newModelTargetUnexplainedVariance, newModelMeanBiasCovariates, model.getBiasCovariateARDCoefficients()), subsetReadCounts);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) NDArrayIndex(org.nd4j.linalg.indexing.NDArrayIndex) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) Nd4j(org.nd4j.linalg.factory.Nd4j) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Pair(org.apache.commons.lang3.tuple.Pair) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TargetWriter(org.broadinstitute.hellbender.tools.exome.TargetWriter) Utils(org.broadinstitute.hellbender.utils.Utils) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Target(org.broadinstitute.hellbender.tools.exome.Target) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Example 75 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class CoverageModelParameters method adaptModelToReadCountCollection.

/**
     * This method "adapts" a model to a read count collection in the following sense:
     *
     *     - removes targets that are not included in the model from the read counts collection
     *     - removes targets that are in the read count collection from the model
     *     - rearranges model targets in the same order as read count collection targets
     *
     * The modifications are not done in-place and the original input parameters remain intact.
     *
     * @param model a model
     * @param readCounts a read count collection
     * @return a pair of model and read count collection
     */
public static ImmutablePair<CoverageModelParameters, ReadCountCollection> adaptModelToReadCountCollection(@Nonnull final CoverageModelParameters model, @Nonnull final ReadCountCollection readCounts, @Nonnull final Logger logger) {
    logger.info("Adapting model to read counts...");
    Utils.nonNull(model, "The model parameters must be non-null");
    Utils.nonNull(readCounts, "The read count collection must be non-null");
    Utils.nonNull(logger, "The logger must be non-null");
    final List<Target> modelTargetList = model.getTargetList();
    final List<Target> readCountsTargetList = readCounts.targets();
    final Set<Target> mutualTargetSet = Sets.intersection(new HashSet<>(modelTargetList), new HashSet<>(readCountsTargetList));
    final List<Target> mutualTargetList = readCountsTargetList.stream().filter(mutualTargetSet::contains).collect(Collectors.toList());
    logger.info("Number of mutual targets: " + mutualTargetList.size());
    Utils.validateArg(mutualTargetList.size() > 0, "The intersection between model targets and targets from read count" + " collection is empty. Please check there the model is compatible with the given read count" + " collection.");
    if (modelTargetList.size() > mutualTargetList.size()) {
        logger.info("The following targets dropped from the model: " + Sets.difference(new HashSet<>(modelTargetList), mutualTargetSet).stream().map(Target::getName).collect(Collectors.joining(", ", "[", "]")));
    }
    if (readCountsTargetList.size() > mutualTargetList.size()) {
        logger.info("The following targets dropped from read counts: " + Sets.difference(new HashSet<>(readCountsTargetList), mutualTargetSet).stream().map(Target::getName).collect(Collectors.joining(", ", "[", "]")));
    }
    /* the targets in {@code subsetReadCounts} follow the original order of targets in {@code readCounts} */
    final ReadCountCollection subsetReadCounts = readCounts.subsetTargets(mutualTargetSet);
    /* fetch original model parameters */
    final INDArray originalModelTargetMeanBias = model.getTargetMeanLogBias();
    final INDArray originalModelTargetUnexplainedVariance = model.getTargetUnexplainedVariance();
    final INDArray originalModelMeanBiasCovariates = model.getMeanBiasCovariates();
    /* re-arrange targets, mean log bias, and target-specific unexplained variance */
    final Map<Target, Integer> modelTargetsToIndexMap = IntStream.range(0, modelTargetList.size()).mapToObj(ti -> ImmutablePair.of(modelTargetList.get(ti), ti)).collect(Collectors.toMap(Pair<Target, Integer>::getLeft, Pair<Target, Integer>::getRight));
    final int[] newTargetIndicesInOriginalModel = mutualTargetList.stream().mapToInt(modelTargetsToIndexMap::get).toArray();
    final INDArray newModelTargetMeanBias = Nd4j.create(new int[] { 1, mutualTargetList.size() });
    final INDArray newModelTargetUnexplainedVariance = Nd4j.create(new int[] { 1, mutualTargetList.size() });
    IntStream.range(0, mutualTargetList.size()).forEach(ti -> {
        newModelTargetMeanBias.put(0, ti, originalModelTargetMeanBias.getDouble(0, newTargetIndicesInOriginalModel[ti]));
        newModelTargetUnexplainedVariance.put(0, ti, originalModelTargetUnexplainedVariance.getDouble(0, newTargetIndicesInOriginalModel[ti]));
    });
    /* if model has bias covariates and/or ARD, re-arrange mean/var of bias covariates as well */
    final INDArray newModelMeanBiasCovariates;
    if (model.isBiasCovariatesEnabled()) {
        newModelMeanBiasCovariates = Nd4j.create(new int[] { mutualTargetList.size(), model.getNumLatents() });
        IntStream.range(0, mutualTargetList.size()).forEach(ti -> {
            newModelMeanBiasCovariates.get(NDArrayIndex.point(ti), NDArrayIndex.all()).assign(originalModelMeanBiasCovariates.get(NDArrayIndex.point(newTargetIndicesInOriginalModel[ti]), NDArrayIndex.all()));
        });
    } else {
        newModelMeanBiasCovariates = null;
    }
    return ImmutablePair.of(new CoverageModelParameters(mutualTargetList, newModelTargetMeanBias, newModelTargetUnexplainedVariance, newModelMeanBiasCovariates, model.getBiasCovariateARDCoefficients()), subsetReadCounts);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) NDArrayIndex(org.nd4j.linalg.indexing.NDArrayIndex) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) Nd4j(org.nd4j.linalg.factory.Nd4j) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Pair(org.apache.commons.lang3.tuple.Pair) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TargetWriter(org.broadinstitute.hellbender.tools.exome.TargetWriter) Utils(org.broadinstitute.hellbender.utils.Utils) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Target(org.broadinstitute.hellbender.tools.exome.Target) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Aggregations

Target (org.broadinstitute.hellbender.tools.exome.Target)110 Test (org.testng.annotations.Test)56 File (java.io.File)52 Collectors (java.util.stream.Collectors)42 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)42 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)38 IOException (java.io.IOException)32 java.util (java.util)32 IntStream (java.util.stream.IntStream)32 Assert (org.testng.Assert)32 Pair (org.apache.commons.lang3.tuple.Pair)26 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)26 UserException (org.broadinstitute.hellbender.exceptions.UserException)26 Genotype (htsjdk.variant.variantcontext.Genotype)22 List (java.util.List)22 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)22 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)22 DataProvider (org.testng.annotations.DataProvider)22 VariantContext (htsjdk.variant.variantcontext.VariantContext)20 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)20