Search in sources :

Example 26 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class GeneDifferentialExpressionServiceImpl method getDifferentialExpressionMetaAnalysis.

@Override
public DifferentialExpressionMetaAnalysisValueObject getDifferentialExpressionMetaAnalysis(double threshold, Gene g, Map<Long, Long> eeFactorsMap, Collection<BioAssaySet> activeExperiments) {
    StopWatch timer = new StopWatch();
    timer.start();
    /*
         * Get results for each active experiment on given gene. Handling the threshold check below since we ignore this
         * for the meta analysis. The results returned are for all factors, not just the factors we are seeking.
         */
    Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> resultsMap = differentialExpressionResultService.find(g, EntityUtils.getIds(activeExperiments));
    GeneDifferentialExpressionServiceImpl.log.debug(resultsMap.size() + " results for " + g + " in " + activeExperiments);
    DifferentialExpressionMetaAnalysisValueObject mavo = new DifferentialExpressionMetaAnalysisValueObject();
    DoubleArrayList pvaluesToCombine = new DoubleArrayList();
    /* a gene can have multiple probes that map to it, so store one diff value object for each probe */
    Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();
    Collection<Long> eesThatMetThreshold = new HashSet<>();
    for (ExpressionExperimentValueObject ee : resultsMap.keySet()) {
        ExpressionExperimentValueObject eevo = this.configExpressionExperimentValueObject(ee);
        Collection<DifferentialExpressionValueObject> proberesults = resultsMap.get(ee);
        Collection<DifferentialExpressionValueObject> filteredResults = new HashSet<>();
        for (DifferentialExpressionValueObject r : proberesults) {
            Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();
            assert efs.size() > 0;
            if (efs.size() > 1) {
                // We always ignore interaction effects.
                continue;
            }
            ExperimentalFactorValueObject ef = efs.iterator().next();
            /*
                 * note that we don't care about the reverse: the eefactorsmap can have stuff we don't need. We focus on
                 * the experiments because they are easy to select & secure. The eefactorsmap provides additional
                 * details.
                 */
            assert eeFactorsMap.containsKey(ee.getId()) : "eeFactorsMap does not contain ee=" + ee.getId();
            Long sfId = eeFactorsMap.get(ee.getId());
            if (!ef.getId().equals(sfId)) {
                /*
                     * Screen out factors we're not using.
                     */
                continue;
            }
            /* filtered result with chosen factor */
            filteredResults.add(r);
        }
        if (filteredResults.size() == 0) {
            GeneDifferentialExpressionServiceImpl.log.warn("No result for ee=" + ee);
            continue;
        }
        /*
             * For the diff expression meta analysis, ignore threshold. Select the 'best' penalized probe if multiple
             * probes map to the same gene.
             */
        DifferentialExpressionValueObject res = this.findMinPenalizedProbeResult(filteredResults);
        if (res == null)
            continue;
        Double p = res.getP();
        if (p == null)
            continue;
        /*
             * Moderate the pvalues by setting all values to be no smaller than PVALUE_CLIP_THRESHOLD
             */
        pvaluesToCombine.add(Math.max(p, GeneDifferentialExpressionService.PVALUE_CLIP_THRESHOLD));
        for (DifferentialExpressionValueObject r : filteredResults) {
            Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();
            if (efs == null) {
                // This should not happen any more, but just in case.
                GeneDifferentialExpressionServiceImpl.log.warn("No experimentalFactor(s) for DifferentialExpressionAnalysisResult: " + r.getId());
                continue;
            }
            Boolean metThreshold = r.getCorrP() != null && (r.getCorrP() <= threshold);
            r.setMetThreshold(metThreshold);
            if (metThreshold) {
                eesThatMetThreshold.add(eevo.getId());
            }
            Boolean fisherContribution = r.equals(res);
            r.setFisherContribution(fisherContribution);
        }
    }
    /*
         * Meta-analysis part.
         */
    double fisherPval = MetaAnalysis.fisherCombinePvalues(pvaluesToCombine);
    mavo.setFisherPValue(fisherPval);
    mavo.setGene(new GeneValueObject(g));
    mavo.setActiveExperiments(activeExperiments);
    mavo.setProbeResults(devos);
    mavo.setNumMetThreshold(eesThatMetThreshold.size());
    mavo.setSortKey();
    timer.stop();
    if (timer.getTime() > 1000) {
        GeneDifferentialExpressionServiceImpl.log.info("Meta-analysis results: " + timer.getTime() + " ms");
    }
    return mavo;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) DifferentialExpressionValueObject(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionValueObject) DoubleArrayList(cern.colt.list.DoubleArrayList) StopWatch(org.apache.commons.lang3.time.StopWatch) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 27 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class AbstractDifferentialExpressionAnalyzer method computeRanks.

/**
 * @param pvalues pvalues
 * @return normalized ranks of the pvalues, or null if they were invalid/unusable.
 */
double[] computeRanks(double[] pvalues) {
    if (pvalues == null) {
        log.error("Null pvalues");
        return null;
    }
    if (pvalues.length == 0) {
        log.error("Empty pvalues array");
        return null;
    }
    DoubleArrayList ranks = Rank.rankTransform(new DoubleArrayList(pvalues));
    if (ranks == null) {
        log.error("Pvalue ranks could not be computed");
        return null;
    }
    double[] normalizedRanks = new double[ranks.size()];
    for (int i = 0; i < ranks.size(); i++) {
        normalizedRanks[i] = ranks.get(i) / ranks.size();
    }
    return normalizedRanks;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 28 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class DiffExMetaAnalyzerServiceImpl method doMetaAnalysis.

private GeneDifferentialExpressionMetaAnalysis doMetaAnalysis(Collection<ExpressionAnalysisResultSet> updatedResultSets, Map<Gene, Collection<DifferentialExpressionAnalysisResult>> gene2result) {
    DiffExMetaAnalyzerServiceImpl.log.info("Computing pvalues ...");
    DoubleArrayList pvaluesUp = new DoubleArrayList();
    DoubleArrayList pvaluesDown = new DoubleArrayList();
    // third pass: collate to get p-values. First we have to aggregate within result set for genes which have more
    // than one probe
    List<GeneDifferentialExpressionMetaAnalysisResult> metaAnalysisResultsUp = new ArrayList<>();
    List<GeneDifferentialExpressionMetaAnalysisResult> metaAnalysisResultsDown = new ArrayList<>();
    for (Gene g : gene2result.keySet()) {
        Map<ExpressionAnalysisResultSet, Collection<DifferentialExpressionAnalysisResult>> resultSet2Results4Gene = this.getResults4GenePerResultSet(g, gene2result);
        if (g.getOfficialSymbol().equals("GUK1")) {
            DiffExMetaAnalyzerServiceImpl.log.info(g);
        }
        /*
             * Compute the pvalues for each resultset.
             */
        DoubleArrayList pvalues4geneUp = new DoubleArrayList();
        DoubleArrayList pvalues4geneDown = new DoubleArrayList();
        DoubleArrayList foldChanges4gene = new DoubleArrayList();
        Collection<DifferentialExpressionAnalysisResult> resultsUsed = new HashSet<>();
        for (ExpressionAnalysisResultSet rs : resultSet2Results4Gene.keySet()) {
            Collection<DifferentialExpressionAnalysisResult> res = resultSet2Results4Gene.get(rs);
            if (res.isEmpty()) {
                // shouldn't happen?
                DiffExMetaAnalyzerServiceImpl.log.warn("Unexpectedly no results in resultSet for gene " + g);
                continue;
            }
            Double foldChange4GeneInOneResultSet = this.aggregateFoldChangeForGeneWithinResultSet(res);
            if (foldChange4GeneInOneResultSet == null) {
                // we can't go on.
                continue;
            }
            // we use the pvalue for the 'best' direction, and set the other to be 1- that. An alternative would be
            // to use _only_ the best one.
            Double pvalue4GeneInOneResultSetUp;
            Double pvalue4GeneInOneResultSetDown;
            if (foldChange4GeneInOneResultSet < 0) {
                pvalue4GeneInOneResultSetDown = this.aggregatePvaluesForGeneWithinResultSet(res, false);
                assert pvalue4GeneInOneResultSetDown != null;
                pvalue4GeneInOneResultSetUp = 1.0 - pvalue4GeneInOneResultSetDown;
            } else {
                pvalue4GeneInOneResultSetUp = this.aggregatePvaluesForGeneWithinResultSet(res, true);
                assert pvalue4GeneInOneResultSetUp != null;
                pvalue4GeneInOneResultSetDown = 1.0 - pvalue4GeneInOneResultSetUp;
            }
            // If we have missing values, skip them. (this should be impossible!)
            if (Double.isNaN(pvalue4GeneInOneResultSetUp) || Double.isNaN(pvalue4GeneInOneResultSetDown)) {
                continue;
            }
            /*
                 * Now when we correct, we have to 1) bonferroni correct for multiple probes and 2) clip really small
                 * pvalues. We do this now, so that we don't skew the converse pvalues (up vs. down).
                 */
            pvalue4GeneInOneResultSetUp = this.correctAndClip(res, pvalue4GeneInOneResultSetUp);
            pvalue4GeneInOneResultSetDown = this.correctAndClip(res, pvalue4GeneInOneResultSetDown);
            // results used for this one gene's meta-analysis.
            boolean added = resultsUsed.addAll(res);
            assert added;
            pvalues4geneUp.add(pvalue4GeneInOneResultSetUp);
            pvalues4geneDown.add(pvalue4GeneInOneResultSetDown);
            foldChanges4gene.add(foldChange4GeneInOneResultSet);
            if (DiffExMetaAnalyzerServiceImpl.log.isDebugEnabled())
                DiffExMetaAnalyzerServiceImpl.log.debug(String.format("%s %.4f %.4f %.1f", g.getOfficialSymbol(), pvalue4GeneInOneResultSetUp, pvalue4GeneInOneResultSetDown, foldChange4GeneInOneResultSet));
        }
        // loop over results for one gene
        assert resultsUsed.size() >= pvalues4geneUp.size();
        if (pvalues4geneUp.size() < 2) {
            continue;
        }
        /*
             * Note that this value can be misleading. It should not be used to determine the change that was
             * "looked for". Use the 'upperTail' field instead.
             */
        assert !Double.isNaN(Descriptive.mean(foldChanges4gene));
        double fisherPvalueUp = MetaAnalysis.fisherCombinePvalues(pvalues4geneUp);
        double fisherPvalueDown = MetaAnalysis.fisherCombinePvalues(pvalues4geneDown);
        if (Double.isNaN(fisherPvalueUp) || Double.isNaN(fisherPvalueDown)) {
            continue;
        }
        pvaluesUp.add(fisherPvalueUp);
        GeneDifferentialExpressionMetaAnalysisResult metaAnalysisResultUp = this.makeMetaAnalysisResult(g, resultsUsed, fisherPvalueUp, Boolean.TRUE);
        metaAnalysisResultsUp.add(metaAnalysisResultUp);
        pvaluesDown.add(fisherPvalueDown);
        GeneDifferentialExpressionMetaAnalysisResult metaAnalysisResultDown = this.makeMetaAnalysisResult(g, resultsUsed, fisherPvalueDown, Boolean.FALSE);
        metaAnalysisResultsDown.add(metaAnalysisResultDown);
        this.debug(g, fisherPvalueUp, fisherPvalueDown);
    }
    assert metaAnalysisResultsUp.size() == metaAnalysisResultsDown.size();
    if (metaAnalysisResultsDown.isEmpty()) {
        // can happen if platforms don't have any genes that match etc.
        DiffExMetaAnalyzerServiceImpl.log.warn("No meta-analysis results were obtained");
        return null;
    }
    DiffExMetaAnalyzerServiceImpl.log.info(metaAnalysisResultsUp.size() + " initial meta-analysis results");
    DoubleArrayList qvaluesUp = MultipleTestCorrection.benjaminiHochberg(pvaluesUp);
    assert qvaluesUp.size() == metaAnalysisResultsUp.size();
    DoubleArrayList qvaluesDown = MultipleTestCorrection.benjaminiHochberg(pvaluesDown);
    assert qvaluesDown.size() == metaAnalysisResultsDown.size();
    return this.makeMetaAnalysisObject(updatedResultSets, metaAnalysisResultsUp, metaAnalysisResultsDown, qvaluesUp, qvaluesDown);
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList) Gene(ubic.gemma.model.genome.Gene)

Example 29 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class OutlierDetectionServiceImpl method identifyOutliersByMedianCorrelation.

@Override
public Collection<OutlierDetails> identifyOutliersByMedianCorrelation(DoubleMatrix<BioAssay, BioAssay> cormat) {
    List<OutlierDetails> allSamples = new ArrayList<>();
    OutlierDetails sample;
    /* Find the 1st, 2nd, and 3rd quartiles of each sample */
    for (int i = 0; i < cormat.rows(); i++) {
        DoubleArrayList cors = new DoubleArrayList();
        sample = new OutlierDetails(cormat.getRowName(i));
        for (int j = 0; j < cormat.columns(); j++) {
            if (j != i) {
                // get all sample correlations except correlation with self
                double d = cormat.get(i, j);
                cors.add(d);
            }
        }
        assert (cors.size() == cormat.rows() - 1);
        sample.setFirstQuartile(this.findValueAtDesiredQuantile(cors, 25));
        sample.setMedianCorrelation(this.findValueAtDesiredQuantile(cors, 50));
        sample.setThirdQuartile(this.findValueAtDesiredQuantile(cors, 75));
        if (sample.getFirstQuartile() == Double.MIN_VALUE || sample.getMedianCorrelation() == Double.MIN_VALUE || sample.getThirdQuartile() == Double.MIN_VALUE) {
            throw new IllegalStateException("Could not determine one or more quartiles for a sample; ");
        }
        allSamples.add(sample);
    }
    /* Sort all samples by median correlation */
    Collections.sort(allSamples, OutlierDetails.MedianComparator);
    int numOutliers = 0;
    /* Check for overlap of first quartile and median of consecutive samples */
    for (int k = 0; k < allSamples.size() - 1; k++) {
        // if ( allSamples.get( k ).getMedianCorrelation() < allSamples.get( k + 1 ).getFirstQuartile() ) {
        if (allSamples.get(k).getThirdQuartile() < allSamples.get(k + 1).getFirstQuartile()) {
            numOutliers = k + 1;
        }
    }
    List<OutlierDetails> outliers = new ArrayList<>();
    for (int m = 0; m < numOutliers; m++) {
        outliers.add(allSamples.get(m));
    }
    /*
         * Check that all outliers are legitimate (controls for situations where sorting by median does not give 'true'
         * order)
         */
    if (numOutliers > 0) {
        OutlierDetectionServiceImpl.log.info("Removing false positives; number of outliers before test: " + numOutliers);
        outliers = this.removeFalsePositives(allSamples, outliers, numOutliers);
        numOutliers = outliers.size();
        OutlierDetectionServiceImpl.log.info("Number of outliers after removing false positives: " + numOutliers);
    }
    OutlierDetectionServiceImpl.log.info("Found " + numOutliers + " outlier(s)");
    return outliers;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 30 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class DataUpdater method addTotalCountInformation.

private void addTotalCountInformation(ExpressionExperiment ee, ExpressionDataDoubleMatrix countEEMatrix, Integer readLength, Boolean isPairedReads) {
    for (BioAssay ba : ee.getBioAssays()) {
        Double[] col = countEEMatrix.getColumn(ba);
        double librarySize = DescriptiveWithMissing.sum(new DoubleArrayList(ArrayUtils.toPrimitive(col)));
        DataUpdater.log.info(ba + " total library size=" + librarySize);
        ba.setSequenceReadLength(readLength);
        ba.setSequencePairedReads(isPairedReads);
        ba.setSequenceReadCount((int) Math.floor(librarySize));
        bioAssayService.update(ba);
    }
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

DoubleArrayList (cern.colt.list.DoubleArrayList)82 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)11 ArrayList (java.util.ArrayList)9 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)8 IntArrayList (cern.colt.list.IntArrayList)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)5 TetradVector (edu.cmu.tetrad.util.TetradVector)5 Test (org.junit.Test)5 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)3 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)3 Regression (edu.cmu.tetrad.regression.Regression)3 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)3 StopWatch (org.apache.commons.lang3.time.StopWatch)2 CoordinatePoint (org.onebusaway.geospatial.model.CoordinatePoint)2 Record (org.onebusaway.transit_data.model.realtime.CurrentVehicleEstimateQueryBean.Record)2 ScheduledBlockLocation (org.onebusaway.transit_data_federation.services.blocks.ScheduledBlockLocation)2 BlockLocation (org.onebusaway.transit_data_federation.services.realtime.BlockLocation)2 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)2