Search in sources :

Example 36 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method getRanks.

private DoubleArrayList getRanks(ExpressionDataDoubleMatrix intensities, ProcessedExpressionDataVectorDao.RankMethod method) {
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.debug("Getting ranks");
    assert intensities != null;
    DoubleArrayList result = new DoubleArrayList(intensities.rows());
    for (ExpressionDataMatrixRowElement de : intensities.getRowElements()) {
        double[] rowObj = ArrayUtils.toPrimitive(intensities.getRow(de.getDesignElement()));
        double valueForRank = Double.MIN_VALUE;
        if (rowObj != null) {
            DoubleArrayList row = new DoubleArrayList(rowObj);
            switch(method) {
                case max:
                    valueForRank = DescriptiveWithMissing.max(row);
                    break;
                case mean:
                    valueForRank = DescriptiveWithMissing.mean(row);
                    break;
                default:
                    throw new UnsupportedOperationException();
            }
        }
        result.add(valueForRank);
    }
    return Rank.rankTransform(result);
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 37 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class BatchConfound method factorBatchConfoundTest.

private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
    Map<Long, Long> batchMembership = new HashMap<>();
    ExperimentalFactor batchFactor = null;
    Map<Long, Integer> batchIndexes = new HashMap<>();
    for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
        if (ExperimentalDesignUtils.isBatch(ef)) {
            batchFactor = ef;
            Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
            if (bmToFv == null) {
                log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
                continue;
            }
            int index = 0;
            for (FactorValue fv : batchFactor.getFactorValues()) {
                batchIndexes.put(fv.getId(), index++);
            }
            for (Long bmId : bmToFv.keySet()) {
                batchMembership.put(bmId, bmToFv.get(bmId).longValue());
            }
            break;
        }
    }
    Set<BatchConfoundValueObject> result = new HashSet<>();
    if (batchFactor == null) {
        return result;
    }
    for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
        if (ef.equals(batchFactor))
            continue;
        Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
        int numBioMaterials = bmToFv.keySet().size();
        assert numBioMaterials > 0 : "No biomaterials for " + ef;
        double p = Double.NaN;
        double chiSquare;
        int df;
        int numBatches = batchFactor.getFactorValues().size();
        if (ExperimentalDesignUtils.isContinuous(ef)) {
            DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
            factorValues.setSize(numBioMaterials);
            IntArrayList batches = new IntArrayList(numBioMaterials);
            batches.setSize(numBioMaterials);
            int j = 0;
            for (Long bmId : bmToFv.keySet()) {
                assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
                factorValues.set(j, bmToFv.get(bmId));
                long batch = batchMembership.get(bmId);
                batches.set(j, batchIndexes.get(batch));
                j++;
            }
            p = KruskalWallis.test(factorValues, batches);
            df = KruskalWallis.dof(factorValues, batches);
            chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
            log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
        } else {
            Map<Long, Integer> factorValueIndexes = new HashMap<>();
            int index = 0;
            for (FactorValue fv : ef.getFactorValues()) {
                factorValueIndexes.put(fv.getId(), index++);
            }
            Map<Long, Long> factorValueMembership = new HashMap<>();
            for (Long bmId : bmToFv.keySet()) {
                factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
            }
            long[][] counts = new long[numBatches][ef.getFactorValues().size()];
            for (int i = 0; i < batchIndexes.size(); i++) {
                for (int j = 0; j < factorValueIndexes.size(); j++) {
                    counts[i][j] = 0;
                }
            }
            for (Long bm : bmToFv.keySet()) {
                long fv = factorValueMembership.get(bm);
                Long batch = batchMembership.get(bm);
                if (batch == null) {
                    log.warn("No batch membership for : " + bm);
                    continue;
                }
                int batchIndex = batchIndexes.get(batch);
                int factorIndex = factorValueIndexes.get(fv);
                counts[batchIndex][factorIndex]++;
            }
            ChiSquareTest cst = new ChiSquareTest();
            try {
                chiSquare = cst.chiSquare(counts);
            } catch (IllegalArgumentException e) {
                log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
                chiSquare = Double.NaN;
            }
            df = (counts.length - 1) * (counts[0].length - 1);
            ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
            if (!Double.isNaN(chiSquare)) {
                p = 1.0 - distribution.cumulativeProbability(chiSquare);
            }
            log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
        }
        BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
        result.add(summary);
    }
    return result;
}
Also used : ChiSquaredDistribution(org.apache.commons.math3.distribution.ChiSquaredDistribution) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DoubleArrayList(cern.colt.list.DoubleArrayList) IntArrayList(cern.colt.list.IntArrayList) ChiSquareTest(org.apache.commons.math3.stat.inference.ChiSquareTest)

Example 38 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ComBat method run.

/**
 * @param parametric if false, use the non-parametric (slower) method for estimating the priors.
 * @return corrected data
 * @throws ComBatException combat problems
 */
public DoubleMatrix2D run(boolean parametric) throws ComBatException {
    StopWatch timer = new StopWatch();
    timer.start();
    final DoubleMatrix2D sdata = this.standardize(y, x);
    this.checkForProblems(sdata);
    if (timer.getTime() > 1000) {
        ComBat.log.info("Standardized");
    }
    timer.reset();
    timer.start();
    this.gammaHat(sdata);
    this.deltaHat(sdata);
    // assertEquals( 1.618, deltaHat.get( 0, 0 ), 0.001 );
    // gamma.bar <- apply(gamma.hat, 1, mean)
    gammaBar = new DoubleArrayList();
    t2 = new DoubleArrayList();
    for (int batchIndex = 0; batchIndex < gammaHat.rows(); batchIndex++) {
        double mean = DescriptiveWithMissing.mean(new DoubleArrayList(gammaHat.viewRow(batchIndex).toArray()));
        gammaBar.add(mean);
        t2.add(DescriptiveWithMissing.sampleVariance(new DoubleArrayList(gammaHat.viewRow(batchIndex).toArray()), mean));
    }
    // assertEquals( -0.092144, gammaBar.get( 0 ), 0.001 );
    // assertEquals( 0.2977, t2.get( 1 ), 0.001 );
    aPrior = this.aPrior(deltaHat);
    bPrior = this.bPrior(deltaHat);
    if (timer.getTime() > 1000) {
        ComBat.log.info("Computed priors");
    }
    // assertEquals( 17.4971, aPrior.get( 0 ), 0.0001 );
    // assertEquals( 4.514, bPrior.get( 1 ), 0.0001 );
    DoubleMatrix2D gammastar = new DenseDoubleMatrix2D(numBatches, numProbes);
    DoubleMatrix2D deltastar = new DenseDoubleMatrix2D(numBatches, numProbes);
    if (!parametric) {
        this.runNonParametric(sdata, gammastar, deltastar);
    } else {
        this.runParametric(sdata, gammastar, deltastar);
    }
    DoubleMatrix2D adjustedData = this.rawAdjust(sdata, gammastar, deltastar);
    // assertEquals( -0.95099, adjustedData.get( 18, 0 ), 0.0001 );
    // assertEquals( -0.30273984, adjustedData.get( 14, 6 ), 0.0001 );
    // assertEquals( 0.2097977, adjustedData.get( 7, 3 ), 0.0001 );
    // log.info( adjustedData );
    DoubleMatrix2D result = this.restoreScale(adjustedData);
    if (timer.getTime() > 1000) {
        ComBat.log.info("Done");
    }
    return result;
}
Also used : DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleArrayList(cern.colt.list.DoubleArrayList) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 39 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ComBat method deltaHat.

private void deltaHat(DoubleMatrix2D sdata) {
    int batchIndex;
    deltaHat = new DenseDoubleMatrix2D(numBatches, numProbes);
    batchIndex = 0;
    for (String batchId : batches.keySet()) {
        DoubleMatrix2D batchData = this.getBatchData(sdata, batchId);
        for (int j = 0; j < batchData.rows(); j++) {
            DoubleArrayList row = new DoubleArrayList(batchData.viewRow(j).toArray());
            double variance = DescriptiveWithMissing.sampleVariance(row, DescriptiveWithMissing.mean(row));
            deltaHat.set(batchIndex, j, variance);
        }
        batchIndex++;
    }
}
Also used : DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 40 with DoubleArrayList

use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.

the class ComBat method standardize.

/**
 * Special standardization: partial regression of covariates
 *
 * @param b b
 * @param A A
 * @return double matrix 2d
 */
DoubleMatrix2D standardize(DoubleMatrix2D b, DoubleMatrix2D A) {
    DoubleMatrix2D beta = new LeastSquaresFit(A, b).getCoefficients();
    // assertEquals( 3.7805, beta.get( 0, 0 ), 0.001 );
    // assertEquals( 0.0541, beta.get( 2, 18 ), 0.001 );
    int batchIndex = 0;
    DoubleMatrix2D bba = new DenseDoubleMatrix2D(1, numBatches);
    for (String batchId : batches.keySet()) {
        bba.set(0, batchIndex++, (double) batches.get(batchId).size() / numSamples);
    }
    /*
         * Weight the non-batch coefficients by the batch sizes.
         */
    DoubleMatrix2D grandMeanM = solver.mult(bba, beta.viewPart(0, 0, numBatches, beta.columns()));
    if (hasMissing) {
        varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus);
        DoubleMatrix2D var = new DenseDoubleMatrix2D(varpooled.rows(), 1);
        for (int i = 0; i < varpooled.rows(); i++) {
            DoubleMatrix1D row = varpooled.viewRow(i);
            double m = DescriptiveWithMissing.mean(new DoubleArrayList(row.toArray()));
            double v = DescriptiveWithMissing.sampleVariance(new DoubleArrayList(row.toArray()), m);
            var.set(i, 0, v);
        }
        varpooled = var;
    } else {
        varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus).assign(Functions.pow(2));
        DoubleMatrix2D scale = new DenseDoubleMatrix2D(numSamples, 1);
        scale.assign(1.0 / numSamples);
        varpooled = solver.mult(varpooled, scale);
    }
    DoubleMatrix2D size = new DenseDoubleMatrix2D(numSamples, 1);
    size.assign(1.0);
    /*
         * The coefficients repeated for each sample.
         */
    standMean = solver.mult(solver.transpose(grandMeanM), solver.transpose(size));
    /*
         * Erase the batch factors from a copy of the design matrix
         */
    DoubleMatrix2D tmpX = x.copy();
    for (batchIndex = 0; batchIndex < numBatches; batchIndex++) {
        for (int j = 0; j < x.rows(); j++) {
            tmpX.set(j, batchIndex, 0.0);
        }
    }
    /*
         * row means, adjusted "per group", and ignoring batch effects.
         */
    standMean = standMean.assign(solver.transpose(solver.mult(tmpX, beta)), Functions.plus);
    DoubleMatrix2D varsq = solver.mult(varpooled.copy().assign(Functions.sqrt), solver.transpose(size));
    /*
         * Subtract the mean and divide by the standard deviations.
         */
    DoubleMatrix2D meansubtracted = y.copy().assign(standMean, Functions.minus);
    return meansubtracted.assign(varsq, Functions.div);
}
Also used : DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) LeastSquaresFit(ubic.basecode.math.linearmodels.LeastSquaresFit) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleArrayList(cern.colt.list.DoubleArrayList)

Aggregations

DoubleArrayList (cern.colt.list.DoubleArrayList)82 RegressionResult (edu.cmu.tetrad.regression.RegressionResult)11 ArrayList (java.util.ArrayList)9 AndersonDarlingTest (edu.cmu.tetrad.data.AndersonDarlingTest)8 IntArrayList (cern.colt.list.IntArrayList)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)5 TetradVector (edu.cmu.tetrad.util.TetradVector)5 Test (org.junit.Test)5 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)3 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)3 Regression (edu.cmu.tetrad.regression.Regression)3 RegressionDataset (edu.cmu.tetrad.regression.RegressionDataset)3 StopWatch (org.apache.commons.lang3.time.StopWatch)2 CoordinatePoint (org.onebusaway.geospatial.model.CoordinatePoint)2 Record (org.onebusaway.transit_data.model.realtime.CurrentVehicleEstimateQueryBean.Record)2 ScheduledBlockLocation (org.onebusaway.transit_data_federation.services.blocks.ScheduledBlockLocation)2 BlockLocation (org.onebusaway.transit_data_federation.services.realtime.BlockLocation)2 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)2