Search in sources :

Example 6 with DenseDoubleMatrix

use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataSVD method removeHighestComponents.

/**
 * Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
 * normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
 * (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
 * variable is known.
 *
 * @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < numComponentsToRemove; i++) {
        copy.set(i, i, 0.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 7 with DenseDoubleMatrix

use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 8 with DenseDoubleMatrix

use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method renormalize.

/**
 * @param vectors Do not call this on ratiometric or count data.
 */
private void renormalize(Map<CompositeSequence, DoubleVectorValueObject> vectors) {
    int cols = vectors.values().iterator().next().getBioAssayDimension().getBioAssays().size();
    DoubleMatrix<CompositeSequence, Integer> mat = new DenseDoubleMatrix<>(vectors.size(), cols);
    for (int i = 0; i < cols; i++) {
        mat.setColumnName(i, i);
    }
    int i = 0;
    for (CompositeSequence c : vectors.keySet()) {
        DoubleVectorValueObject v = vectors.get(c);
        double[] data = v.getData();
        assert data.length == cols;
        for (int j = 0; j < cols; j++) {
            mat.set(i, j, data[j]);
        }
        mat.setRowName(c, i);
        i++;
    }
    this.doQuantileNormalization(mat, vectors);
    assert mat.rows() == vectors.size();
}
Also used : DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 9 with DenseDoubleMatrix

use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataMatrixServiceImpl method getRankMatrix.

@Override
public DoubleMatrix<Gene, ExpressionExperiment> getRankMatrix(Collection<Gene> genes, Collection<ExpressionExperiment> ees, ProcessedExpressionDataVectorDao.RankMethod method) {
    DoubleMatrix<Gene, ExpressionExperiment> matrix = new DenseDoubleMatrix<>(genes.size(), ees.size());
    Map<ExpressionExperiment, Map<Gene, Collection<Double>>> ranks = processedExpressionDataVectorService.getRanks(ees, genes, method);
    matrix.setRowNames(new ArrayList<>(genes));
    matrix.setColumnNames(new ArrayList<>(ees));
    for (int i = 0; i < matrix.rows(); i++) {
        for (int j = 0; j < matrix.columns(); j++) {
            matrix.setByKeys(matrix.getRowName(i), matrix.getColName(j), Double.NaN);
        }
    }
    for (Gene g : matrix.getRowNames()) {
        for (ExpressionExperiment e : matrix.getColNames()) {
            if (ranks.containsKey(e)) {
                Collection<Double> r = ranks.get(e).get(g);
                if (r == null) {
                    continue;
                }
                Double[] ar = r.toArray(new Double[r.size()]);
                // compute median of collection.
                double[] dar = ArrayUtils.toPrimitive(ar);
                double medianRank = DescriptiveWithMissing.median(new DoubleArrayList(dar));
                matrix.setByKeys(g, e, medianRank);
            }
        }
    }
    return matrix;
}
Also used : DoubleArrayList(cern.colt.list.DoubleArrayList) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) Gene(ubic.gemma.model.genome.Gene) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) Map(java.util.Map)

Example 10 with DenseDoubleMatrix

use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrix method createMatrix.

/**
 * Fill in the data
 *
 * @return DoubleMatrixNamed
 */
private DoubleMatrix<CompositeSequence, BioMaterial> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    int numRows = this.rowDesignElementMapByInteger.keySet().size();
    DoubleMatrix<CompositeSequence, BioMaterial> mat = new DenseDoubleMatrix<>(numRows, maxSize);
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(this.getBioMaterialForColumn(j));
    }
    // initialize the matrix to -Infinity; this marks values that are not yet initialized.
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, Double.NEGATIVE_INFINITY);
        }
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
    for (DesignElementDataVector vector : vectors) {
        BioAssayDimension dimension = vector.getBioAssayDimension();
        byte[] bytes = vector.getData();
        CompositeSequence designElement = vector.getDesignElement();
        assert designElement != null : "No design element for " + vector;
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        rowNames.put(rowIndex, designElement);
        double[] vals = bac.byteArrayToDoubles(bytes);
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        if (bioAssays.size() != vals.length)
            throw new IllegalStateException("Mismatch: " + vals.length + " values in vector ( " + bytes.length + " bytes) for " + designElement + " got " + bioAssays.size() + " bioassays in the bioAssayDimension");
        Iterator<BioAssay> it = bioAssays.iterator();
        this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
    }
    /*
         * Note: these row names aren't that important unless we use the bare matrix.
         */
    for (int i = 0; i < mat.rows(); i++) {
        mat.addRowName(rowNames.get(i));
    }
    assert mat.getRowNames().size() == mat.rows();
    // fill in remaining missing values.
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            if (mat.get(i, j) == Double.NEGATIVE_INFINITY) {
                // log.debug( "Missing value at " + i + " " + j );
                mat.set(i, j, Double.NaN);
            }
        }
    }
    ExpressionDataDoubleMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
    return mat;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)11 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)8 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)5 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)2 Algebra (cern.colt.matrix.linalg.Algebra)2 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)2 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)1 DecimalFormat (java.text.DecimalFormat)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 Test (org.junit.Test)1 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)1 ModelAndView (org.springframework.web.servlet.ModelAndView)1 ColorMatrix (ubic.basecode.graphics.ColorMatrix)1 MatrixDisplay (ubic.basecode.graphics.MatrixDisplay)1