Search in sources :

Example 41 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class RowLevelFilter method computeCriteria.

private int computeCriteria(ExpressionDataDoubleMatrix data, DoubleArrayList criteria) {
    int numRows = data.rows();
    int numCols = data.columns();
    /*
         * compute criteria.
         */
    DoubleArrayList rowAsList = new DoubleArrayList(new double[numCols]);
    int numAllNeg = 0;
    for (int i = 0; i < numRows; i++) {
        Double[] row = data.getRow(i);
        int numNeg = 0;
        /* stupid, copy into a DoubleArrayList so we can do stats */
        for (int j = 0; j < numCols; j++) {
            Double item = row[j];
            if (Double.isNaN(item))
                rowAsList.set(j, 0);
            else
                rowAsList.set(j, item);
            if (item < 0.0 || Double.isNaN(item)) {
                numNeg++;
            }
        }
        if (numNeg == numCols) {
            numAllNeg++;
        }
        this.addCriterion(criteria, rowAsList, data.getDesignElementForRow(i), i);
    }
    return numAllNeg;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 42 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class LinkAnalysis method getCorrelationDistribution.

public CoexpCorrelationDistribution getCorrelationDistribution() {
    CoexpCorrelationDistribution result = CoexpCorrelationDistribution.Factory.newInstance();
    DoubleArrayList histogramArrayList = this.metricMatrix.getHistogramArrayList();
    result.setNumBins(histogramArrayList.size());
    ByteArrayConverter bac = new ByteArrayConverter();
    result.setBinCounts(bac.doubleArrayToBytes(MatrixUtil.fromList(histogramArrayList).toArray()));
    return result;
}
Also used : CoexpCorrelationDistribution(ubic.gemma.model.analysis.expression.coexpression.CoexpCorrelationDistribution) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 43 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class LinkAnalysis method chooseCutPoints.

/**
 * Compute the thresholds needed to choose links for storage in the system.
 */
private void chooseCutPoints() {
    DoubleArrayList cdf = Stats.cdf(metricMatrix.getHistogramArrayList());
    if (config.getCdfCut() <= 0.0) {
        config.setUpperTailCut(1.0);
        config.setLowerTailCut(-1.0);
        return;
    }
    if (config.getCdfCut() >= 1.0) {
        config.setUpperTailCut(0.0);
        config.setLowerTailCut(0.0);
        return;
    }
    double cdfTailCut = config.getCdfCut();
    double cdfUpperCutScore = 0.0;
    double cdfLowerCutScore = 0.0;
    // find the lower tail cutpoint, if we have to.
    if (!config.isAbsoluteValue()) {
        cdfTailCut /= 2.0;
        // histogram.
        for (int i = 0; i < cdf.size(); i++) {
            if (1.0 - cdf.get(i) >= cdfTailCut) {
                cdfLowerCutScore = metricMatrix.getScoreInBin(i == cdf.size() ? i : i + 1);
                break;
            }
        }
        LinkAnalysis.log.debug(form.format(cdfLowerCutScore) + " is the lower cdf cutpoint at " + cdfTailCut);
    }
    // find the upper cut point.
    for (int i = cdf.size() - 1; i >= 0; i--) {
        if (cdf.get(i) >= cdfTailCut) {
            cdfUpperCutScore = metricMatrix.getScoreInBin(i == cdf.size() ? i : i + 1);
            break;
        }
    }
    LinkAnalysis.log.debug(form.format(cdfUpperCutScore) + " is the upper cdf cutpoint at " + cdfTailCut);
    // get the cutpoint based on statistical signficance.
    double maxP = 1.0;
    double scoreAtP = 0.0;
    if (config.getFwe() != 0.0) {
        double numUniqueGenes = metricMatrix.getNumUniqueGenes();
        // bonferroni.
        maxP = config.getFwe() / numUniqueGenes;
        scoreAtP = CorrelationStats.correlationForPvalue(maxP, this.dataMatrix.columns());
        LinkAnalysis.log.debug("Minimum correlation to get " + form.format(maxP) + " is about " + form.format(scoreAtP) + " for " + numUniqueGenes + " unique items (if all " + this.dataMatrix.columns() + " items are present)");
        if (scoreAtP > 0.9) {
            LinkAnalysis.log.warn("This data set has a very high threshold for statistical significance!");
        }
    }
    // this is the corrected
    this.metricMatrix.setPValueThreshold(maxP);
    // choose cut points, with one independent criterion or the most stringent criteria
    if (config.getSingularThreshold().equals(SingularThreshold.none)) {
        config.setUpperTailCut(Math.max(scoreAtP, cdfUpperCutScore));
        if (config.getUpperTailCut() == scoreAtP) {
            config.setUpperCdfCutUsed(false);
        } else if (config.getUpperTailCut() == cdfUpperCutScore) {
            config.setUpperCdfCutUsed(true);
        }
        if (!config.isAbsoluteValue()) {
            config.setLowerTailCut(Math.min(-scoreAtP, cdfLowerCutScore));
        }
        if (config.getLowerTailCut() == scoreAtP) {
            config.setLowerCdfCutUsed(false);
        } else if (config.getLowerTailCut() == cdfLowerCutScore) {
            config.setLowerCdfCutUsed(true);
        }
    } else if (config.getSingularThreshold().equals(SingularThreshold.fwe)) {
        config.setUpperTailCut(scoreAtP);
        if (!config.isAbsoluteValue()) {
            config.setLowerTailCut(-scoreAtP);
        }
        config.setUpperCdfCutUsed(false);
        config.setLowerCdfCutUsed(false);
    } else if (config.getSingularThreshold().equals(SingularThreshold.cdfcut)) {
        config.setUpperTailCut(cdfUpperCutScore);
        if (!config.isAbsoluteValue()) {
            config.setLowerTailCut(cdfLowerCutScore);
        }
        // use only cdfCut exclusively to keep links
        metricMatrix.setUsePvalueThreshold(false);
        config.setUpperCdfCutUsed(true);
        config.setLowerCdfCutUsed(true);
    }
    LinkAnalysis.log.info("Final upper cut is " + form.format(config.getUpperTailCut()));
    LinkAnalysis.log.info("Final lower cut is " + form.format(config.getLowerTailCut()));
    metricMatrix.setUpperTailThreshold(config.getUpperTailCut());
    if (config.isAbsoluteValue()) {
        metricMatrix.setLowerTailThreshold(config.getUpperTailCut());
    } else {
        metricMatrix.setLowerTailThreshold(config.getLowerTailCut());
    }
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 44 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class SpearmanMetrics method getRankTransformedData.

/**
 * @param usedB will be filled in, if not null. This also precomputes the row statistics (row means and sumsq
 *              deviations)
 */
private void getRankTransformedData(boolean[][] usedB) {
    int numRows = this.dataMatrix.rows();
    int numCols = this.dataMatrix.columns();
    rankTransformedData = new double[numRows][];
    for (int i = 0; i < numRows; i++) {
        Double[] row = this.dataMatrix.getRow(i);
        // make a copy.
        double[] r = new double[row.length];
        for (int m = 0, v = row.length; m < v; m++) {
            r[m] = row[m];
        }
        DoubleArrayList ranksIA = Rank.rankTransform(new DoubleArrayList(r));
        assert ranksIA != null;
        double[] ri = new double[ranksIA.size()];
        for (int n = 0, w = ranksIA.size(); n < w; n++) {
            ri[n] = ranksIA.get(n);
        }
        rankTransformedData[i] = ri;
        if (usedB != null) {
            for (int j = 0; j < numCols; j++) {
                // this is only needed if we use it below, speeds things up
                usedB[i][j] = used.get(i, j);
            // slightly.
            }
        }
    }
    this.rowStatistics();
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 45 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class SpearmanMetrics method spearman.

protected double spearman(double[] vectorA, double[] vectorB, boolean[] usedA, boolean[] usedB, int i, int j) {
    /* because we assume there might be ties, we compute the correlation of the ranks. */
    /*
         * Note that if there are missing values, the precomputed ranks will be wrong. Strictly the ranks need to be
         * -recomputed-.
         */
    // first count the number of mutually present values
    int numused = 0;
    for (int k = 0; k < vectorA.length; k++) {
        if (usedA[k] && usedB[k]) {
            numused++;
        }
    }
    if (numused < minNumUsed) {
        this.setCorrel(i, j, Double.NaN, 0);
        return Double.NaN;
    }
    double[] xjc;
    double[] yjc;
    if (numused == vectorA.length) {
        xjc = vectorA;
        yjc = vectorB;
    } else {
        xjc = new double[numused];
        yjc = new double[numused];
        int v = 0;
        for (int k = 0; k < vectorA.length; k++) {
            if (usedA[k] && usedB[k]) {
                xjc[v] = vectorA[k];
                yjc[v] = vectorB[k];
                v++;
            }
        }
        /*
             * Retransform
             */
        xjc = Rank.rankTransform(new DoubleArrayList(xjc)).elements();
        yjc = Rank.rankTransform(new DoubleArrayList(yjc)).elements();
    }
    double correl;
    double sxy = 0.0;
    double sxx = 0.0;
    double syy = 0.0;
    double sx = 0.0;
    double sy = 0.0;
    numused = 0;
    for (int k = 0; k < xjc.length; k++) {
        double xj = xjc[k];
        double yj = yjc[k];
        sx += xj;
        sy += yj;
        sxy += xj * yj;
        sxx += xj * xj;
        syy += yj * yj;
        numused++;
    }
    double denom = this.correlationNorm(numused, sxx, sx, syy, sy);
    if (denom <= 0.0) {
        // means variance is zero for one of the vectors.
        this.setCorrel(i, j, 0.0, numused);
        return 0.0;
    }
    correl = (sxy - sx * sy / numused) / Math.sqrt(denom);
    // small range deviations (roundoff) are okay but shouldn't be big ones!
    assert correl < 1.0001 && correl > -1.0001;
    // roundoff protection.
    if (correl < -1.0)
        correl = -1.0;
    else if (correl > 1.0)
        correl = 1.0;
    this.setCorrel(i, j, correl, numused);
    return correl;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Aggregations

DoubleArrayList (cern.colt.list.DoubleArrayList)82 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)11 ArrayList (java.util.ArrayList)9 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)8 IntArrayList (cern.colt.list.IntArrayList)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)5 TetradVector (edu.cmu.tetrad.util.TetradVector)5 Test (org.junit.Test)5 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)3 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)3 Regression (edu.cmu.tetrad.regression.Regression)3 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)3 StopWatch (org.apache.commons.lang3.time.StopWatch)2 CoordinatePoint (org.onebusaway.geospatial.model.CoordinatePoint)2 Record (org.onebusaway.transit_data.model.realtime.CurrentVehicleEstimateQueryBean.Record)2 ScheduledBlockLocation (org.onebusaway.transit_data_federation.services.blocks.ScheduledBlockLocation)2 BlockLocation (org.onebusaway.transit_data_federation.services.realtime.BlockLocation)2 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)2