Search in sources :

Example 26 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class ComBat method checkForProblems.

/**
 * Check sdata for problems. If the design is not of full rank, we get NaN in standardized data.
 *
 * @param sdata sdata
 * @throws ComBatException combat problem
 */
private void checkForProblems(DoubleMatrix2D sdata) throws ComBatException {
    int numMissing = 0;
    int total = 0;
    for (int i = 0; i < sdata.rows(); i++) {
        DoubleMatrix1D row = sdata.viewRow(i);
        for (int j = 0; j < sdata.columns(); j++) {
            if (Double.isNaN(row.getQuick(j))) {
                numMissing++;
            }
            total++;
        }
    }
    if (total == numMissing) {
        /*
             * Alternative that can help in some cases: back out and drop factors. There are definitely strategies for
             * doing this (drop factors that have no major PC loadings, for example), but it might be bad to do this
             * "automagically".
             */
        throw new ComBatException("Could not complete batch correction: model must not be of full rank.");
    }
}
Also used : DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Example 27 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class ComBat method runNonParametric.

private void runNonParametric(final DoubleMatrix2D sdata, DoubleMatrix2D gammastar, DoubleMatrix2D deltastar) {
    final ConcurrentHashMap<String, DoubleMatrix1D[]> results = new ConcurrentHashMap<>();
    int numThreads = Math.min(batches.size(), Runtime.getRuntime().availableProcessors());
    ComBat.log.info("Runing nonparametric estimation on " + numThreads + " threads");
    Future<?>[] futures = new Future[numThreads];
    ExecutorService service = Executors.newCachedThreadPool();
    /*
         * Divvy up batches over threads.
         */
    int batchesPerThread = batches.size() / numThreads;
    final String[] batchIds = batches.keySet().toArray(new String[] {});
    for (int i = 0; i < numThreads; i++) {
        final int firstBatch = i * batchesPerThread;
        final int lastBatch = i == (numThreads - 1) ? batches.size() : firstBatch + batchesPerThread;
        futures[i] = service.submit(new Runnable() {

            @Override
            public void run() {
                for (int k = firstBatch; k < lastBatch; k++) {
                    String batchId = batchIds[k];
                    DoubleMatrix2D batchData = ComBat.this.getBatchData(sdata, batchId);
                    DoubleMatrix1D[] batchResults = ComBat.this.nonParametricFit(batchData, gammaHat.viewRow(k), deltaHat.viewRow(k));
                    results.put(batchId, batchResults);
                }
            }
        });
    }
    service.shutdown();
    boolean allDone = false;
    do {
        for (Future<?> f : futures) {
            allDone = true;
            if (!f.isDone() && !f.isCancelled()) {
                allDone = false;
                break;
            }
        }
    } while (!allDone);
    for (int i = 0; i < batchIds.length; i++) {
        String batchId = batchIds[i];
        DoubleMatrix1D[] batchResults = results.get(batchId);
        for (int j = 0; j < batchResults[0].size(); j++) {
            gammastar.set(i, j, batchResults[0].get(j));
        }
        for (int j = 0; j < batchResults[1].size(); j++) {
            deltastar.set(i, j, batchResults[1].get(j));
        }
    }
}
Also used : DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 28 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.

/**
 * Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
 * we store log2cpm as the processed data, so that is what would generally be used).
 *
 * @param quantitationType QT
 * @param dmatrix          matrix
 * @return ee data double matrix
 */
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
    ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
    if (scaleType.equals(ScaleType.LOG2)) {
        ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
    } else if (scaleType.equals(ScaleType.LN)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
    } else if (scaleType.equals(ScaleType.LOG10)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
    } else if (scaleType.equals(ScaleType.LINEAR)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
        MatrixStats.logTransform(dmatrix.getMatrix());
    } else if (scaleType.equals(ScaleType.COUNT)) {
        /*
             * Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
             */
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
        DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
        dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
    } else {
        throw new UnknownLogScaleException("Can't figure out what scale the data are on");
    }
    /*
         * We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
         * really track this down).
         *
         * Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
         * happens when data is "clipped" (e.g., all values under 10 set to 10).
         */
    int r = dmatrix.rows();
    dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
    if (dmatrix.rows() < r) {
        ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
    }
    r = dmatrix.rows();
    if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
        dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
        if (dmatrix.rows() < r) {
            ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
        }
    }
    return dmatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) UnknownLogScaleException(ubic.gemma.core.analysis.preprocess.UnknownLogScaleException)

Example 29 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class DataUpdater method log2cpmFromCounts.

/**
 * For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
 * get log2cpm computed when loaded.
 *
 * @param ee ee
 * @param qt qt
 */
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
    ee = experimentService.thawLite(ee);
    /*
         * Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
         * We need to do this from the Raw data, not the data that has been normalized etc.
         */
    Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
    ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
    try {
        /*
             * Get the count data quantitation type and make it non-preferred
             */
        qt.setIsPreferred(false);
        qtService.update(qt);
        // so updated QT is attached.
        ee = experimentService.thawLite(ee);
        QuantitationType log2cpmQt = this.makelog2cpmQt();
        DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
        ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
        assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
        Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
        if (platforms.size() > 1)
            throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
        this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
    } catch (Exception e) {
        DataUpdater.log.error(e, e);
        // try to recover.
        qt.setIsPreferred(true);
        qtService.update(qt);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ConfigurationException(org.apache.commons.configuration.ConfigurationException) PreprocessingException(ubic.gemma.core.analysis.preprocess.PreprocessingException) IOException(java.io.IOException)

Example 30 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project beast-mcmc by beast-dev.

the class ComplexSubstitutionModel method setupMatrix.

public void setupMatrix() {
    if (!eigenInitialised) {
        initialiseEigen();
        storedEvalImag = new double[stateCount];
    }
    int i = 0;
    storeIntoAmat();
    makeValid(amat, stateCount);
    // compute eigenvalues and eigenvectors
    // EigenvalueDecomposition eigenDecomp = new EigenvalueDecomposition(new DenseDoubleMatrix2D(amat));
    RobustEigenDecomposition eigenDecomp;
    try {
        eigenDecomp = new RobustEigenDecomposition(new DenseDoubleMatrix2D(amat), maxIterations);
    } catch (ArithmeticException ae) {
        System.err.println(ae.getMessage());
        wellConditioned = false;
        System.err.println("amat = \n" + new Matrix(amat));
        return;
    }
    DoubleMatrix2D eigenV = eigenDecomp.getV();
    DoubleMatrix1D eigenVReal = eigenDecomp.getRealEigenvalues();
    DoubleMatrix1D eigenVImag = eigenDecomp.getImagEigenvalues();
    DoubleMatrix2D eigenVInv;
    if (checkConditioning) {
        RobustSingularValueDecomposition svd;
        try {
            svd = new RobustSingularValueDecomposition(eigenV, maxIterations);
        } catch (ArithmeticException ae) {
            System.err.println(ae.getMessage());
            wellConditioned = false;
            return;
        }
        if (svd.cond() > maxConditionNumber) {
            wellConditioned = false;
            return;
        }
    }
    try {
        eigenVInv = alegbra.inverse(eigenV);
    } catch (IllegalArgumentException e) {
        wellConditioned = false;
        return;
    }
    Ievc = eigenVInv.toArray();
    Evec = eigenV.toArray();
    Eval = eigenVReal.toArray();
    EvalImag = eigenVImag.toArray();
    // Check for valid decomposition
    for (i = 0; i < stateCount; i++) {
        if (Double.isNaN(Eval[i]) || Double.isNaN(EvalImag[i]) || Double.isInfinite(Eval[i]) || Double.isInfinite(EvalImag[i])) {
            wellConditioned = false;
            return;
        } else if (Math.abs(Eval[i]) < 1e-10) {
            Eval[i] = 0.0;
        }
    }
    updateMatrix = false;
    wellConditioned = true;
    // compute normalization and rescale eigenvalues
    computeStationaryDistribution();
    if (doNormalization) {
        double subst = 0.0;
        for (i = 0; i < stateCount; i++) subst += -amat[i][i] * stationaryDistribution[i];
        for (i = 0; i < stateCount; i++) {
            Eval[i] /= subst;
            EvalImag[i] /= subst;
        }
    }
}
Also used : Matrix(dr.math.matrixAlgebra.Matrix) RobustSingularValueDecomposition(dr.math.matrixAlgebra.RobustSingularValueDecomposition) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) RobustEigenDecomposition(dr.math.matrixAlgebra.RobustEigenDecomposition)

Aggregations

DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)70 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)37 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)25 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)17 Algebra (cern.colt.matrix.linalg.Algebra)9 Node (edu.cmu.tetrad.graph.Node)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)6 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)5 IntComparator (cern.colt.function.IntComparator)4 DoubleArrayList (cern.colt.list.DoubleArrayList)4 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)4 DoubleFactory2D (cern.colt.matrix.DoubleFactory2D)3 Matrix (dr.math.matrixAlgebra.Matrix)2 RobustEigenDecomposition (dr.math.matrixAlgebra.RobustEigenDecomposition)2 ICovarianceMatrix (edu.cmu.tetrad.data.ICovarianceMatrix)2 Endpoint (edu.cmu.tetrad.graph.Endpoint)2 Graph (edu.cmu.tetrad.graph.Graph)2 SemGraph (edu.cmu.tetrad.graph.SemGraph)2 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)2 IOException (java.io.IOException)2