Search in sources :

Example 66 with DoubleMatrix2D

use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.

the class ComBat method standardize.

/**
 * Special standardization: partial regression of covariates
 *
 * @param b b
 * @param A A
 * @return double matrix 2d
 */
DoubleMatrix2D standardize(DoubleMatrix2D b, DoubleMatrix2D A) {
    DoubleMatrix2D beta = new LeastSquaresFit(A, b).getCoefficients();
    // assertEquals( 3.7805, beta.get( 0, 0 ), 0.001 );
    // assertEquals( 0.0541, beta.get( 2, 18 ), 0.001 );
    int batchIndex = 0;
    DoubleMatrix2D bba = new DenseDoubleMatrix2D(1, numBatches);
    for (String batchId : batches.keySet()) {
        bba.set(0, batchIndex++, (double) batches.get(batchId).size() / numSamples);
    }
    /*
         * Weight the non-batch coefficients by the batch sizes.
         */
    DoubleMatrix2D grandMeanM = solver.mult(bba, beta.viewPart(0, 0, numBatches, beta.columns()));
    if (hasMissing) {
        varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus);
        DoubleMatrix2D var = new DenseDoubleMatrix2D(varpooled.rows(), 1);
        for (int i = 0; i < varpooled.rows(); i++) {
            DoubleMatrix1D row = varpooled.viewRow(i);
            double m = DescriptiveWithMissing.mean(new DoubleArrayList(row.toArray()));
            double v = DescriptiveWithMissing.sampleVariance(new DoubleArrayList(row.toArray()), m);
            var.set(i, 0, v);
        }
        varpooled = var;
    } else {
        varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus).assign(Functions.pow(2));
        DoubleMatrix2D scale = new DenseDoubleMatrix2D(numSamples, 1);
        scale.assign(1.0 / numSamples);
        varpooled = solver.mult(varpooled, scale);
    }
    DoubleMatrix2D size = new DenseDoubleMatrix2D(numSamples, 1);
    size.assign(1.0);
    /*
         * The coefficients repeated for each sample.
         */
    standMean = solver.mult(solver.transpose(grandMeanM), solver.transpose(size));
    /*
         * Erase the batch factors from a copy of the design matrix
         */
    DoubleMatrix2D tmpX = x.copy();
    for (batchIndex = 0; batchIndex < numBatches; batchIndex++) {
        for (int j = 0; j < x.rows(); j++) {
            tmpX.set(j, batchIndex, 0.0);
        }
    }
    /*
         * row means, adjusted "per group", and ignoring batch effects.
         */
    standMean = standMean.assign(solver.transpose(solver.mult(tmpX, beta)), Functions.plus);
    DoubleMatrix2D varsq = solver.mult(varpooled.copy().assign(Functions.sqrt), solver.transpose(size));
    /*
         * Subtract the mean and divide by the standard deviations.
         */
    DoubleMatrix2D meansubtracted = y.copy().assign(standMean, Functions.minus);
    return meansubtracted.assign(varsq, Functions.div);
}
Also used : DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) LeastSquaresFit(ubic.basecode.math.linearmodels.LeastSquaresFit) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 67 with DoubleMatrix2D

use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.

private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
    ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
    DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
    designU = this.orderMatrix(matrix, designU);
    ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
    boolean transformed = false;
    if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
        transformed = true;
        MatrixStats.logTransform(matrix);
    }
    /*
         * Process
         */
    ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
    // false: NONPARAMETRIC
    DoubleMatrix2D results = comBat.run(true);
    // note these plots always reflect the parametric setup.
    // TEMPORARY?
    comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
    /*
         * Postprocess. Results is a raw matrix/
         */
    DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
    correctedDataMatrix.setRowNames(matrix.getRowNames());
    correctedDataMatrix.setColumnNames(matrix.getColNames());
    if (transformed) {
        MatrixStats.unLogTransform(correctedDataMatrix);
    }
    ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
    assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
    /*
         * It is easier if we make a new quantitationtype.
         */
    QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
    QuantitationType newQt = this.makeNewQuantitationType(oldQt);
    correctedExpressionDataMatrix.getQuantitationTypes().clear();
    correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
    // Sanity check...
    for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
        assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
    }
    return correctedExpressionDataMatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 68 with DoubleMatrix2D

use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.

the class ExpressionDataSVD method removeHighestComponents.

/**
 * Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
 * normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
 * (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
 * variable is known.
 *
 * @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < numComponentsToRemove; i++) {
        copy.set(i, i, 0.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 69 with DoubleMatrix2D

use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 70 with DoubleMatrix2D

use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.

the class ComBatTest method test2WithMissingValues.

@Test
public void test2WithMissingValues() throws Exception {
    DoubleMatrixReader f = new DoubleMatrixReader();
    DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.madata.withmissing.small.txt"));
    StringMatrixReader of = new StringMatrixReader();
    StringMatrix<String, String> sampleInfo = of.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.metadata.small.txt"));
    @SuppressWarnings({ "unchecked", "rawtypes" }) ComBat<String, String> comBat = new ComBat(testMatrix, sampleInfo);
    DoubleMatrix2D X = comBat.getDesignMatrix();
    assertEquals(1, X.get(0, 0), 0.001);
    assertEquals(0, X.get(3, 0), 0.001);
    assertEquals(1, X.get(4, 2), 0.001);
    DoubleMatrix2D y = new DenseDoubleMatrix2D(testMatrix.asArray());
    DoubleMatrix2D sdata = comBat.standardize(y, X);
    assertEquals(-0.23640626, sdata.get(17, 1), 0.0001);
    assertEquals(0.51027241, sdata.get(8, 2), 0.001);
    assertEquals(0.2107944, sdata.get(0, 8), 0.001);
    assertEquals(0.23769649, sdata.get(3, 7), 0.001);
    assertEquals(Double.NaN, sdata.get(7, 6), 0.001);
    DoubleMatrix2D finalResult = comBat.run();
    assertEquals(10.660466, finalResult.get(7, 0), 0.0001);
    assertEquals(11.733197, finalResult.get(7, 7), 0.0001);
    assertEquals(Double.NaN, finalResult.get(7, 6), 0.0001);
    assertEquals(6.802441, finalResult.get(10, 7), 0.0001);
// log.info( finalResult );
// X08.1 X54.1 X36.1 X23.1 X17.1 X40.1 X45.1 X55.1 X11.1
// 1553129_at 3.861661 3.656498 3.891722 3.969015 3.928164 3.859776 3.885422 3.831730 3.853814
// 213447_at 6.233625 5.400615 5.583825 6.034642 6.457188 6.173610 5.322877 4.591996 6.655735
// 242039_at 8.155451 8.487645 7.512280 7.043722 7.570154 7.928574 8.138381 8.538423 7.937447
// 223394_at 7.794531 8.178473 8.285406 8.316963 7.845536 8.255656 8.604694 8.184320 7.311231
// 227758_at 3.813320 3.474997 NA 3.663592 3.701014 NA 3.648964 3.618175 3.985569
// 207696_at 3.576939 3.525421 3.561366 3.506479 3.516473 3.593750 3.628095 3.676431 3.599589
// 241107_at 6.264194 5.926644 5.654168 5.730628 6.185137 5.587933 5.527347 5.895269 6.441413
// 228980_at 10.660466 11.090106 10.769495 10.990729 10.616753 11.819747 NA 11.733197 10.516411
// 204452_s_at 6.038281 5.403597 5.950596 6.443812 5.676120 5.238702 5.616082 5.290953 5.543041
// 1562443_at 4.618687 3.961298 4.671874 4.512624 4.829666 4.138126 4.232039 4.048561 4.696936
// 232018_at 6.221217 6.882512 6.093883 5.937127 5.987227 6.502522 6.940522 6.802441 5.673800
// 1561877_at 3.793029 3.751057 3.719922 3.866485 4.070190 3.658865 3.465794 3.854070 3.878497
// 221183_at 6.800233 5.559318 6.247321 6.566830 6.731457 5.701761 6.062595 5.097052 7.117171
// 206162_x_at 5.273091 5.238142 5.023724 4.886765 5.162352 5.564269 5.573007 5.980072 5.558662
// 214502_at 4.047844 NA 3.841319 4.006797 NA 4.504433 3.992359 4.192473 3.773261
// 234099_at 7.628902 6.875036 7.101699 6.929775 7.202759 6.431563 6.622195 6.751740 7.740300
// 237400_at 4.396190 NA 4.978136 4.775859 5.379108 5.809133 4.611809 4.853239 4.734252
// 240254_at 4.062600 3.851718 4.274175 4.153745 4.030111 6.324506 4.089158 3.739869 4.426321
// 209053_s_at 5.970077 6.378914 6.241240 6.450990 5.944027 6.702078 6.463590 6.372133 5.964286
}
Also used : StringMatrixReader(ubic.basecode.io.reader.StringMatrixReader) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) Test(org.junit.Test)

Aggregations

DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)136 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)38 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)36 Algebra (cern.colt.matrix.linalg.Algebra)16 DoubleFactory2D (cern.colt.matrix.DoubleFactory2D)13 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)13 Node (edu.cmu.tetrad.graph.Node)11 Graph (edu.cmu.tetrad.graph.Graph)8 Test (org.junit.Test)6 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)6 StringMatrixReader (ubic.basecode.io.reader.StringMatrixReader)6 DataSet (edu.cmu.tetrad.data.DataSet)5 DoubleArrayList (cern.colt.list.DoubleArrayList)4 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)4 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)4 AbstractFormatter (cern.colt.matrix.impl.AbstractFormatter)3 Endpoint (edu.cmu.tetrad.graph.Endpoint)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)3 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)3