Search in sources :

Example 21 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ComBat method aPrior.

private DoubleArrayList aPrior(DoubleMatrix2D d) {
    DoubleArrayList result = new DoubleArrayList();
    for (int i = 0; i < d.rows(); i++) {
        DoubleArrayList dd = new DoubleArrayList(d.viewRow(i).toArray());
        double mean = DescriptiveWithMissing.mean(dd);
        double var = DescriptiveWithMissing.sampleVariance(dd, mean);
        result.add((2.0 * var + Math.pow(mean, 2)) / var);
    }
    return result;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 22 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ComBat method stepSum.

private DoubleMatrix1D stepSum(DoubleMatrix2D matrix, DoubleMatrix1D gnew) {
    Algebra s = new Algebra();
    DoubleMatrix2D g = new DenseDoubleMatrix2D(1, gnew.size());
    for (int i = 0; i < gnew.size(); i++) {
        g.set(0, i, gnew.get(i));
    }
    DoubleMatrix2D a = new DenseDoubleMatrix2D(1, matrix.columns());
    a.assign(1.0);
    /*
         * subtract column gnew from each column of data; square; then sum over each row.
         */
    DoubleMatrix2D deltas = matrix.copy().assign((s.mult(s.transpose(g), a)), Functions.minus).assign(Functions.square);
    DoubleMatrix1D sumsq = new DenseDoubleMatrix1D(deltas.rows());
    sumsq.assign(0.0);
    for (int i = 0; i < deltas.rows(); i++) {
        sumsq.set(i, DescriptiveWithMissing.sum(new DoubleArrayList(deltas.viewRow(i).toArray())));
    }
    return sumsq;
}
Also used : Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 23 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class RowLevelFilter method filter.

@Override
public ExpressionDataDoubleMatrix filter(ExpressionDataDoubleMatrix data) {
    if (lowCut == -Double.MAX_VALUE && highCut == Double.MAX_VALUE) {
        RowLevelFilter.log.info("No filtering requested");
        return data;
    }
    int numRows = data.rows();
    DoubleArrayList criteria = new DoubleArrayList(new double[numRows]);
    int numAllNeg = this.computeCriteria(data, criteria);
    DoubleArrayList sortedCriteria = criteria.copy();
    sortedCriteria.sort();
    int consideredRows = numRows;
    int startIndex = 0;
    if (removeAllNegative) {
        consideredRows = numRows - numAllNeg;
        startIndex = numAllNeg;
    }
    double realHighCut = this.getHighThreshold(sortedCriteria, consideredRows);
    double realLowCut = this.getLowThreshold(numRows, sortedCriteria, consideredRows, startIndex);
    if (Double.isNaN(realHighCut)) {
        throw new IllegalStateException("High threshold cut is NaN");
    }
    RowLevelFilter.log.debug("Low cut = " + realLowCut);
    RowLevelFilter.log.debug("High cut = " + realHighCut);
    if (realHighCut <= realLowCut) {
        throw new RuntimeException("High cut " + realHighCut + " is lower or same as low cut " + realLowCut);
    }
    List<CompositeSequence> kept = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        // values, zeros should always be removed
        if (criteria.get(i) > realLowCut && criteria.get(i) <= realHighCut) {
            kept.add(data.getDesignElementForRow(i));
        }
    }
    this.logInfo(numRows, kept);
    return new ExpressionDataDoubleMatrix(data, kept);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 24 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method analyzeComponent.

/**
 * Do the factor comparisons for one component.
 *
 * @param bioMaterialFactorMap Map of factors to biomaterials to the value we're going to use. Even for
 *                             non-continuous factors the value is a double.
 */
private void analyzeComponent(SVDValueObject svo, int componentNumber, DoubleMatrix<Long, Integer> vMatrix, Map<Long, Date> bioMaterialDates, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap, Long[] svdBioMaterials) {
    DoubleArrayList eigenGene = new DoubleArrayList(vMatrix.getColumn(componentNumber));
    // since we use rank correlation/anova, we just use the casted ids (two-groups) or dates as the covariate
    int numWithDates = 0;
    for (Long id : bioMaterialDates.keySet()) {
        if (bioMaterialDates.get(id) != null) {
            numWithDates++;
        }
    }
    if (numWithDates > 2) {
        /*
             * Get the dates in order, - no rounding.
             */
        boolean initializingDates = svo.getDates().isEmpty();
        double[] dates = new double[svdBioMaterials.length];
        /*
             * If dates are all the same, skip.
             */
        Set<Date> uniqueDate = new HashSet<>();
        for (int j = 0; j < svdBioMaterials.length; j++) {
            Date date = bioMaterialDates.get(svdBioMaterials[j]);
            if (date == null) {
                SVDServiceHelperImpl.log.warn("Incomplete date information, missing for biomaterial " + svdBioMaterials[j]);
                dates[j] = Double.NaN;
            } else {
                Date roundDate = DateUtils.round(date, Calendar.MINUTE);
                uniqueDate.add(roundDate);
                // round to minute; make int, cast to
                dates[j] = roundDate.getTime();
            // double
            }
            if (initializingDates) {
                svo.getDates().add(date);
            }
        }
        if (uniqueDate.size() == 1) {
            SVDServiceHelperImpl.log.warn("All scan dates the same, skipping data analysis");
            svo.getDates().clear();
        }
        if (eigenGene.size() != dates.length) {
            SVDServiceHelperImpl.log.warn("Could not compute correlation, dates and eigenGene had different lengths.");
            return;
        }
        double dateCorrelation = Distance.spearmanRankCorrelation(eigenGene, new DoubleArrayList(dates));
        svo.setPCDateCorrelation(componentNumber, dateCorrelation);
        svo.setPCDateCorrelationPval(componentNumber, CorrelationStats.spearmanPvalue(dateCorrelation, eigenGene.size()));
    }
    /*
         * Compare each factor (including batch information that is somewhat redundant with the dates) to the
         * eigen-genes. Using rank statistics.
         */
    for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
        Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
        double[] fvs = new double[svdBioMaterials.length];
        assert fvs.length > 0;
        int numNotMissing = 0;
        boolean initializing = false;
        if (!svo.getFactors().containsKey(ef.getId())) {
            svo.getFactors().put(ef.getId(), new ArrayList<Double>());
            initializing = true;
        }
        for (int j = 0; j < svdBioMaterials.length; j++) {
            fvs[j] = bmToFv.get(svdBioMaterials[j]);
            if (!Double.isNaN(fvs[j])) {
                numNotMissing++;
            }
            // value.
            if (initializing) {
                if (SVDServiceHelperImpl.log.isDebugEnabled())
                    SVDServiceHelperImpl.log.debug("EF:" + ef.getId() + " fv=" + bmToFv.get(svdBioMaterials[j]));
                svo.getFactors().get(ef.getId()).add(bmToFv.get(svdBioMaterials[j]));
            }
        }
        if (fvs.length != eigenGene.size()) {
            SVDServiceHelperImpl.log.debug(fvs.length + " factor values (biomaterials) but " + eigenGene.size() + " values in the eigenGene");
            continue;
        }
        if (numNotMissing < SVDServiceHelperImpl.MINIMUM_POINTS_TO_COMPARE_TO_EIGEN_GENE) {
            SVDServiceHelperImpl.log.debug("Insufficient values to compare " + ef + " to eigenGenes");
            continue;
        }
        if (ExperimentalDesignUtils.isContinuous(ef)) {
            double factorCorrelation = Distance.spearmanRankCorrelation(eigenGene, new DoubleArrayList(fvs));
            svo.setPCFactorCorrelation(componentNumber, ef, factorCorrelation);
            svo.setPCFactorCorrelationPval(componentNumber, ef, CorrelationStats.spearmanPvalue(factorCorrelation, eigenGene.size()));
        } else {
            Collection<Integer> groups = new HashSet<>();
            IntArrayList groupings = new IntArrayList(fvs.length);
            int k = 0;
            DoubleArrayList eigenGeneWithoutMissing = new DoubleArrayList();
            for (double d : fvs) {
                if (Double.isNaN(d)) {
                    k++;
                    continue;
                }
                groupings.add((int) d);
                groups.add((int) d);
                eigenGeneWithoutMissing.add(eigenGene.get(k));
                k++;
            }
            if (groups.size() < 2) {
                SVDServiceHelperImpl.log.debug("Factor had less than two groups: " + ef + ", SVD comparison can't be done.");
                continue;
            }
            if (eigenGeneWithoutMissing.size() < SVDServiceHelperImpl.MINIMUM_POINTS_TO_COMPARE_TO_EIGEN_GENE) {
                SVDServiceHelperImpl.log.debug("Too few non-missing values for factor to compare to eigenGenes: " + ef);
                continue;
            }
            if (groups.size() == 2) {
                // use the one that still has missing values.
                double factorCorrelation = Distance.spearmanRankCorrelation(eigenGene, new DoubleArrayList(fvs));
                svo.setPCFactorCorrelation(componentNumber, ef, factorCorrelation);
                svo.setPCFactorCorrelationPval(componentNumber, ef, CorrelationStats.spearmanPvalue(factorCorrelation, eigenGeneWithoutMissing.size()));
            } else {
                // one-way ANOVA on ranks.
                double kwPVal = KruskalWallis.test(eigenGeneWithoutMissing, groupings);
                svo.setPCFactorCorrelationPval(componentNumber, ef, kwPVal);
                double factorCorrelation = Distance.spearmanRankCorrelation(eigenGene, new DoubleArrayList(fvs));
                double corrPvalue = CorrelationStats.spearmanPvalue(factorCorrelation, eigenGeneWithoutMissing.size());
                // sanity.
                assert Math.abs(factorCorrelation) < 1.0 + 1e-2;
                /*
                     * Avoid storing a pvalue, as it's hard to compare. If the regular linear correlation is strong,
                     * then we should just use that -- basically, it means the order we have the groups happens to be a
                     * good one. Of course we could just store pvalues, but that's not easy to use either.
                     */
                if (corrPvalue <= kwPVal) {
                    svo.setPCFactorCorrelation(componentNumber, ef, factorCorrelation);
                } else {
                    // hack. A bit like turning pvalues into prob it
                    double approxCorr = CorrelationStats.correlationForPvalue(kwPVal, eigenGeneWithoutMissing.size());
                    svo.setPCFactorCorrelation(componentNumber, ef, approxCorr);
                }
            }
        }
    }
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DoubleArrayList(cern.colt.list.DoubleArrayList) IntArrayList(cern.colt.list.IntArrayList)

Example 25 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class SpearmanMetricsTest method testCorrelB.

/**
 * This tests the same values as testCorrelWithMissing, different method than testCorrelC
 */
@Test
public void testCorrelB() {
    boolean[] usedA = new boolean[] { true, true, true, true, true, true, true, true, true };
    boolean[] usedB = new boolean[] { true, true, true, true, true, true, true, true, true };
    double[] a = new double[] { 400, 310, 20, 20, 688, 498, 533, 1409, 1500 };
    double[] b = new double[] { 1545, 2072, 1113, 676, 2648, 2478, 2574, 5155, 1624 };
    assertEquals(a.length, b.length);
    assertEquals(a.length, usedA.length);
    assertEquals(b.length, usedB.length);
    DoubleArrayList ranksIA = Rank.rankTransform(new DoubleArrayList(a));
    DoubleArrayList ranksIB = Rank.rankTransform(new DoubleArrayList(b));
    SpearmanMetrics test = new SpearmanMetrics(10);
    double actualValue = test.spearman(ranksIA.elements(), ranksIB.elements(), usedA, usedB, 0, 1);
    double expectedValue = 0.7113033;
    assertEquals(expectedValue, actualValue, 0.0001);
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) Test(org.junit.Test)

Aggregations

DoubleArrayList (cern.colt.list.DoubleArrayList)82 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)11 ArrayList (java.util.ArrayList)9 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)8 IntArrayList (cern.colt.list.IntArrayList)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)5 TetradVector (edu.cmu.tetrad.util.TetradVector)5 Test (org.junit.Test)5 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)3 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)3 Regression (edu.cmu.tetrad.regression.Regression)3 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)3 StopWatch (org.apache.commons.lang3.time.StopWatch)2 CoordinatePoint (org.onebusaway.geospatial.model.CoordinatePoint)2 Record (org.onebusaway.transit_data.model.realtime.CurrentVehicleEstimateQueryBean.Record)2 ScheduledBlockLocation (org.onebusaway.transit_data_federation.services.blocks.ScheduledBlockLocation)2 BlockLocation (org.onebusaway.transit_data_federation.services.realtime.BlockLocation)2 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)2