Search in sources :

Example 1 with IndexRange

use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.

the class TargetCollectionUnitTest method testIndexRangeHashCode.

@Test
public void testIndexRangeHashCode() {
    final IndexRange r1 = new IndexRange(1, 2);
    final IndexRange r1_bis = new IndexRange(1, 2);
    Assert.assertEquals(r1.hashCode(), r1_bis.hashCode());
    Assert.assertEquals(r1.hashCode(), r1.hashCode());
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 2 with IndexRange

use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.

the class TargetCollectionUnitTest method testIndexRangeEqual.

@Test
public void testIndexRangeEqual() {
    final IndexRange r1 = new IndexRange(1, 2);
    final IndexRange r1_bis = new IndexRange(1, 2);
    final IndexRange r2 = new IndexRange(2, 2);
    final IndexRange r3 = new IndexRange(1, 10);
    Assert.assertEquals(r1, r1_bis);
    Assert.assertEquals(r1, r1);
    Assert.assertNotEquals(r1, r2);
    Assert.assertNotEquals(r1, null);
    Assert.assertNotEquals(r1, new Object());
    Assert.assertNotEquals(r1, r3);
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with IndexRange

use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.

the class MixingFractionUnitTest method testIO.

@Test
public void testIO() throws IOException {
    Utils.resetRandomGenerator();
    final File file = File.createTempFile("mixing_fractions", ".table");
    final String sample1 = "SAMPLE1";
    final double fraction1 = 0.15;
    final String sample2 = "SAMPLE2";
    final double fraction2 = 0.17;
    final List<MixingFraction> original = Arrays.asList(new MixingFraction(sample1, fraction1), new MixingFraction(sample2, fraction2));
    MixingFraction.writeMixingFractions(original, file);
    final List<MixingFraction> copy = MixingFraction.readMixingFractions(file);
    Assert.assertEquals(original.size(), copy.size());
    new IndexRange(0, original.size()).forEach(n -> {
        Assert.assertEquals(original.get(n).getSample(), copy.get(n).getSample());
        Assert.assertEquals(original.get(n).getMixingFraction(), copy.get(n).getMixingFraction());
    });
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) File(java.io.File) MixingFraction(org.broadinstitute.hellbender.tools.walkers.validation.MixingFraction) Test(org.testng.annotations.Test)

Example 4 with IndexRange

use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.

the class CoverageModelEMWorkspace method getViterbiAsNDArray.

/**
     * Fetches the Viterbi copy ratio (or copy number) states as a target-sample matrix
     *
     * @return an {@link INDArray}
     */
protected INDArray getViterbiAsNDArray(final List<List<HiddenStateSegmentRecord<STATE, Target>>> segments) {
    final INDArray res = Nd4j.create(numSamples, numTargets);
    final TargetCollection<Target> targetCollection = new HashedListTargetCollection<>(processedTargetList);
    for (int si = 0; si < numSamples; si++) {
        final SexGenotypeData sampleSexGenotype = processedSampleSexGenotypeData.get(si);
        /* start with all ref */
        final double[] calls = IntStream.range(0, numTargets).mapToDouble(ti -> referenceStateFactory.apply(sampleSexGenotype, processedTargetList.get(ti)).getScalar()).toArray();
        /* go through segments and mutate ref calls as necessary */
        segments.get(si).forEach(seg -> {
            final IndexRange range = targetCollection.indexRange(seg.getSegment());
            final double copyRatio = seg.getSegment().getCall().getScalar();
            for (int ti = range.from; ti < range.to; ti++) {
                calls[ti] = copyRatio;
            }
        });
        res.get(NDArrayIndex.point(si), NDArrayIndex.all()).assign(Nd4j.create(calls, new int[] { 1, numTargets }));
    }
    return res.transpose();
}
Also used : ScalarProducer(org.broadinstitute.hellbender.utils.hmm.interfaces.ScalarProducer) Function2(org.apache.spark.api.java.function.Function2) HMMSegmentProcessor(org.broadinstitute.hellbender.utils.hmm.segmentation.HMMSegmentProcessor) GermlinePloidyAnnotatedTargetCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.GermlinePloidyAnnotatedTargetCollection) HiddenStateSegmentRecordWriter(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecordWriter) BiFunction(java.util.function.BiFunction) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) SexGenotypeData(org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeData) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) CallStringProducer(org.broadinstitute.hellbender.utils.hmm.interfaces.CallStringProducer) StorageLevel(org.apache.spark.storage.StorageLevel) SynchronizedUnivariateSolver(org.broadinstitute.hellbender.tools.coveragemodel.math.SynchronizedUnivariateSolver) CopyRatioExpectationsCalculator(org.broadinstitute.hellbender.tools.coveragemodel.interfaces.CopyRatioExpectationsCalculator) UnivariateSolverSpecifications(org.broadinstitute.hellbender.tools.coveragemodel.math.UnivariateSolverSpecifications) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Broadcast(org.apache.spark.broadcast.Broadcast) ExitStatus(org.broadinstitute.hellbender.tools.coveragemodel.linalg.IterativeLinearSolverNDArray.ExitStatus) SexGenotypeDataCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection) HashPartitioner(org.apache.spark.HashPartitioner) Predicate(java.util.function.Predicate) GeneralLinearOperator(org.broadinstitute.hellbender.tools.coveragemodel.linalg.GeneralLinearOperator) Nd4j(org.nd4j.linalg.factory.Nd4j) INDArrayIndex(org.nd4j.linalg.indexing.INDArrayIndex) FastMath(org.apache.commons.math3.util.FastMath) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) AbstractUnivariateSolver(org.apache.commons.math3.analysis.solvers.AbstractUnivariateSolver) FourierLinearOperatorNDArray(org.broadinstitute.hellbender.tools.coveragemodel.linalg.FourierLinearOperatorNDArray) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) TooManyEvaluationsException(org.apache.commons.math3.exception.TooManyEvaluationsException) Utils(org.broadinstitute.hellbender.utils.Utils) Function(org.apache.spark.api.java.function.Function) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) IntStream(java.util.stream.IntStream) java.util(java.util) NDArrayIndex(org.nd4j.linalg.indexing.NDArrayIndex) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) AlleleMetadataProducer(org.broadinstitute.hellbender.utils.hmm.interfaces.AlleleMetadataProducer) EmissionCalculationStrategy(org.broadinstitute.hellbender.tools.coveragemodel.CoverageModelCopyRatioEmissionProbabilityCalculator.EmissionCalculationStrategy) RobustBrentSolver(org.broadinstitute.hellbender.tools.coveragemodel.math.RobustBrentSolver) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) ImmutableTriple(org.apache.commons.lang3.tuple.ImmutableTriple) IterativeLinearSolverNDArray(org.broadinstitute.hellbender.tools.coveragemodel.linalg.IterativeLinearSolverNDArray) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) IOException(java.io.IOException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Transforms(org.nd4j.linalg.ops.transforms.Transforms) LogManager(org.apache.logging.log4j.LogManager) NoBracketingException(org.apache.commons.math3.exception.NoBracketingException) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) INDArray(org.nd4j.linalg.api.ndarray.INDArray) SexGenotypeData(org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeData)

Example 5 with IndexRange

use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method getLog10PNonRef.

//TODO: this should be a class of static methods once the old AFCalculator is gone.
/**
     * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
     *
     * @param vc the VariantContext holding the alleles and sample information.  The VariantContext
     *           must have at least 1 alternative allele
     * @param refSnpIndelPseudocounts a total hack.  A length-3 vector containing Dirichlet prior pseudocounts to
     *                                be given to ref, alt SNP, and alt indel alleles.  Hack won't be necessary when we destroy the old AF calculators
     * @return result (for programming convenience)
     */
@Override
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
    Utils.nonNull(vc, "VariantContext cannot be null");
    final int numAlleles = vc.getNAlleles();
    final List<Allele> alleles = vc.getAlleles();
    Utils.validateArg(numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
    final double[] priorPseudocounts = alleles.stream().mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
    double[] alleleCounts = new double[numAlleles];
    // log10(1/numAlleles)
    final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles);
    double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
    double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
    while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
        final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
        alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
        alleleCounts = newAlleleCounts;
        final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
        // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
        // effective allele frequency that it overwhelms the genotype likelihood of a real variant
        // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
        log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
    }
    double[] log10POfZeroCountsByAllele = new double[numAlleles];
    double log10PNoVariant = 0;
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
        //the total probability
        log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
        // per allele non-log space probabilities of zero counts for this sample
        // for each allele calculate the total probability of genotypes containing at least one copy of the allele
        final double[] log10ProbabilityOfNonZeroAltAlleles = new double[numAlleles];
        Arrays.fill(log10ProbabilityOfNonZeroAltAlleles, Double.NEGATIVE_INFINITY);
        for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
            final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
            glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) -> log10ProbabilityOfNonZeroAltAlleles[alleleIndex] = MathUtils.log10SumLog10(log10ProbabilityOfNonZeroAltAlleles[alleleIndex], log10GenotypePosterior));
        }
        for (int allele = 0; allele < numAlleles; allele++) {
            // if prob of non hom ref == 1 up to numerical precision, short-circuit to avoid NaN
            if (log10ProbabilityOfNonZeroAltAlleles[allele] >= 0) {
                log10POfZeroCountsByAllele[allele] = Double.NEGATIVE_INFINITY;
            } else {
                log10POfZeroCountsByAllele[allele] += MathUtils.log10OneMinusPow10(log10ProbabilityOfNonZeroAltAlleles[allele]);
            }
        }
    }
    // unfortunately AFCalculationResult expects integers for the MLE.  We really should emit the EM no-integer values
    // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
    final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
    final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
    //skip the ref allele (index 0)
    final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed().collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
    // we compute posteriors here and don't have the same prior that AFCalculationResult expects.  Therefore, we
    // give it our posterior as its "likelihood" along with a flat dummy prior
    //TODO: HACK must be negative for AFCalcResult
    final double[] dummyFlatPrior = { -1e-10, -1e-10 };
    final double[] log10PosteriorOfNoVariantYesVariant = { log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant) };
    return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Genotype(htsjdk.variant.variantcontext.Genotype) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Allele(htsjdk.variant.variantcontext.Allele) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Aggregations

IndexRange (org.broadinstitute.hellbender.utils.IndexRange)32 Test (org.testng.annotations.Test)11 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)9 IntStream (java.util.stream.IntStream)7 Target (org.broadinstitute.hellbender.tools.exome.Target)7 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)7 MathUtils (org.broadinstitute.hellbender.utils.MathUtils)7 Allele (htsjdk.variant.variantcontext.Allele)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)5 Collectors (java.util.stream.Collectors)5 MathArrays (org.apache.commons.math3.util.MathArrays)5 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)5 Utils (org.broadinstitute.hellbender.utils.Utils)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 File (java.io.File)4 java.util (java.util)4 Genotype (htsjdk.variant.variantcontext.Genotype)3 Arrays (java.util.Arrays)3 List (java.util.List)3 Map (java.util.Map)3