Search in sources :

Example 31 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataSVD method removeHighestComponents.

/**
 * Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
 * normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
 * (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
 * variable is known.
 *
 * @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < numComponentsToRemove; i++) {
        copy.set(i, i, 0.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 32 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 33 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class PreprocessorServiceImpl method getCorrectedData.

private ExpressionDataDoubleMatrix getCorrectedData(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> vecs) throws PreprocessingException {
    /*
         * FIXME perhaps here we should remove rows that are going to be problematic?
         */
    ExpressionDataDoubleMatrix correctedData = expressionExperimentBatchCorrectionService.comBat(new ExpressionDataDoubleMatrix(vecs));
    if (correctedData == null || correctedData.rows() != vecs.size()) {
        throw new PreprocessingException(ee.getShortName() + " could not be batch-corrected (matrix returned by ComBat had wrong number of rows)");
    }
    this.checkQuantitationType(correctedData);
    return correctedData;
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)

Example 34 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.

/**
 * Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
 */
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
        Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.thaw(preferredVectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
    assertEquals(10, mat.columns());
    boolean found = false;
    for (int i = 0; i < mat.rows(); i++) {
        Double[] row = mat.getRow(i);
        // debugging
        if (i == 0) {
            for (int j = 0; j < row.length; j++) {
                BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
                System.err.println(ba.getName());
            }
        }
        System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        CompositeSequence el = mat.getDesignElementForRow(i);
        for (int j = 0; j < row.length; j++) {
            BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
            if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
    /*
         * Now do this through the processedExpressionDataVectorService
         */
    Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(30, da.size());
    found = false;
    boolean first = true;
    for (DoubleVectorValueObject v : da) {
        CompositeSequenceValueObject el = v.getDesignElement();
        double[] row = v.getData();
        // debugging
        if (first) {
            for (int j = 0; j < row.length; j++) {
                BioAssayValueObject ba = v.getBioAssays().get(j);
                System.err.println(ba.getName());
            }
            first = false;
        }
        System.err.print(el.getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        assertEquals(10, row.length);
        for (int j = 0; j < row.length; j++) {
            assertNotNull(v.getBioAssays());
            BioAssayValueObject ba = v.getBioAssays().get(j);
            if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 35 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceTest method testComBatOnEE.

@Test
public void testComBatOnEE() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse18162Short")));
    ExpressionExperiment newee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE18162", false, true, false);
        newee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
    }
    assertNotNull(newee);
    newee = expressionExperimentService.thawLite(newee);
    processedExpressionDataVectorService.computeProcessedExpressionData(newee);
    try (InputStream deis = this.getClass().getResourceAsStream("/data/loader/expression/geo/gse18162Short/design.txt")) {
        experimentalDesignImporter.importDesign(newee, deis);
    }
    ExpressionDataDoubleMatrix comBat = correctionService.comBat(newee);
    assertNotNull(comBat);
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) List(java.util.List) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3