Search in sources :

Example 36 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 37 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method diagnoseCorrelationDistribution.

/**
 * Check properties of the distribution
 */
// Better readability
@SuppressWarnings("StatementWithEmptyBody")
private void diagnoseCorrelationDistribution(ExpressionExperiment ee, CoexpCorrelationDistribution corrDist) throws UnsuitableForAnalysisException {
    /*
         * Find the median, etc.
         */
    ByteArrayConverter bac = new ByteArrayConverter();
    double[] binCounts = bac.byteArrayToDoubles(corrDist.getBinCounts());
    int numBins = binCounts.length;
    DoubleMatrix1D histogram = new DenseDoubleMatrix1D(binCounts);
    // QC parameters; quantile, not correlation
    double lowerLimitofMiddle = 0.45;
    double upperLimitofMiddle = 0.55;
    double tailFraction = 0.1;
    // normalize
    histogram.assign(Functions.div(histogram.zSum()));
    double lowerTailDensity = 0.0;
    double upperTailDensity = 0.0;
    double median = 0.0;
    // cumulative
    double s = 0.0;
    double middleDensity = 0.0;
    for (int bin = 0; bin < histogram.size(); bin++) {
        // cumulate
        s += histogram.get(bin);
        /*
             * Perhaps these should be adjusted based on the sample size; for smaller data sets, more of the data is
             * going to be above 0.9 etc. But in practice this can't have a very big effect.
             */
        if (bin == (int) Math.floor(numBins * tailFraction)) {
            lowerTailDensity = s;
        } else if (bin == (int) Math.floor(numBins * (1.0 - tailFraction))) {
            upperTailDensity = 1.0 - s;
        } else if (bin > (int) Math.floor(lowerLimitofMiddle * numBins) && bin < (int) Math.floor(upperLimitofMiddle * numBins)) {
            middleDensity += histogram.get(bin);
        }
        if (s >= 0.2) {
        // firstQuintile = binToCorrelation( i, numBins );
        } else if (s >= 0.5) {
            median = this.binToCorrelation(bin, numBins);
        } else if (s >= 0.8) {
        // lastQuintile = binToCorrelation( i, numBins );
        }
    }
    String message = "";
    boolean bad = false;
    if (median > 0.2 || median < -0.2) {
        bad = true;
        message = "Correlation distribution fails QC: median far from center (" + median + ")";
    } else if (lowerTailDensity + upperTailDensity > middleDensity) {
        bad = true;
        message = "Correlation distribution fails QC: tails too heavy";
    }
    if (bad) {
        throw new UnsuitableForAnalysisException(ee, message);
    }
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Example 38 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.

the class DataUpdater method addCountData.

/**
 * Replaces data. Starting with the count data, we compute the log2cpm, which is the preferred quantitation type we
 * use internally. Counts and FPKM (if provided) are stored in addition.
 *
 * @param ee                  ee
 * @param targetArrayDesign   - this should be one of the "Generic" gene-based platforms. The data set will be
 *                            switched to use it.
 * @param countMatrix         Representing 'raw' counts (added after rpkm, if provided).
 * @param rpkmMatrix          Representing per-gene normalized data, optional (RPKM or FPKM)
 * @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
 */
public void addCountData(ExpressionExperiment ee, ArrayDesign targetArrayDesign, DoubleMatrix<String, String> countMatrix, DoubleMatrix<String, String> rpkmMatrix, Integer readLength, Boolean isPairedReads, boolean allowMissingSamples) {
    if (countMatrix == null)
        throw new IllegalArgumentException("You must provide count matrix (rpkm is optional)");
    targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
    ee = experimentService.thawLite(ee);
    ee = this.dealWithMissingSamples(ee, countMatrix, allowMissingSamples);
    DoubleMatrix<CompositeSequence, BioMaterial> properCountMatrix = this.matchElementsToRowNames(targetArrayDesign, countMatrix);
    this.matchBioMaterialsToColNames(ee, countMatrix, properCountMatrix);
    assert !properCountMatrix.getColNames().isEmpty();
    assert !properCountMatrix.getRowNames().isEmpty();
    QuantitationType countqt = this.makeCountQt();
    ExpressionDataDoubleMatrix countEEMatrix = new ExpressionDataDoubleMatrix(ee, countqt, properCountMatrix);
    QuantitationType log2cpmQt = this.makelog2cpmQt();
    DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix);
    DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(properCountMatrix, librarySize);
    ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
    ee = this.replaceData(ee, targetArrayDesign, log2cpmEEMatrix);
    ee = this.addData(ee, targetArrayDesign, countEEMatrix);
    this.addTotalCountInformation(ee, countEEMatrix, readLength, isPairedReads);
    if (rpkmMatrix != null) {
        DoubleMatrix<CompositeSequence, BioMaterial> properRPKMMatrix = this.matchElementsToRowNames(targetArrayDesign, rpkmMatrix);
        this.matchBioMaterialsToColNames(ee, rpkmMatrix, properRPKMMatrix);
        assert !properRPKMMatrix.getColNames().isEmpty();
        assert !properRPKMMatrix.getRowNames().isEmpty();
        QuantitationType rpkmqt = this.makeRPKMQt();
        ExpressionDataDoubleMatrix rpkmEEMatrix = new ExpressionDataDoubleMatrix(ee, rpkmqt, properRPKMMatrix);
        ee = this.addData(ee, targetArrayDesign, rpkmEEMatrix);
    }
    assert !processedExpressionDataVectorService.getProcessedDataVectors(ee).isEmpty();
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 39 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project tdq-studio-se by Talend.

the class Formatter method demo2.

/**
 * Demonstrates how to use this class.
 */
public static void demo2() {
    // parameters
    double[] values = { // 5, 0.0, -0.0, -Double.NaN, Double.NaN, 0.0/0.0, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE
    5, 0.0, -0.0, -Double.NaN, Double.NaN, 0.0 / 0.0, Double.MIN_VALUE, Double.MAX_VALUE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY // Double.MIN_VALUE, Double.MAX_VALUE //, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY
    };
    // String[] formats =         {"%G", "%1.10G", "%f", "%1.2f", "%0.2e"};
    String[] formats = { "%G", "%1.19G" };
    // now the processing
    int size = formats.length;
    DoubleMatrix1D matrix = new DenseDoubleMatrix1D(values);
    String[] strings = new String[size];
    for (int i = 0; i < size; i++) {
        String format = formats[i];
        strings[i] = new Formatter(format).toString(matrix);
        for (int j = 0; j < matrix.size(); j++) {
            System.out.println(String.valueOf(matrix.get(j)));
        }
    }
    System.out.println("original:\n" + new Formatter().toString(matrix));
    for (int i = 0; i < size; i++) {
        System.out.println("\nstring(" + formats[i] + "):\n" + strings[i]);
    }
}
Also used : AbstractFormatter(cern.colt.matrix.impl.AbstractFormatter) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Example 40 with DoubleMatrix1D

use of cern.colt.matrix.DoubleMatrix1D in project tdq-studio-se by Talend.

the class Partitioning method partition.

/**
 *Same as {@link cern.colt.Partitioning#partition(int[],int,int,int[],int,int,int[])}
 *except that it <i>synchronously</i> partitions the rows of the given matrix by the values of the given matrix column;
 *This is essentially the same as partitioning a list of composite objects by some instance variable;
 *In other words, two entire rows of the matrix are swapped, whenever two column values indicate so.
 *<p>
 *Let's say, a "row" is an "object" (tuple, d-dimensional point).
 *A "column" is the list of "object" values of a given variable (field, dimension).
 *A "matrix" is a list of "objects" (tuples, points).
 *<p>
 *Now, rows (objects, tuples) are partially sorted according to their values in one given variable (dimension).
 *Two entire rows of the matrix are swapped, whenever two column values indicate so.
 *<p>
 *Note that arguments are not checked for validity.
 *<p>
 *<b>Example:</b>
 *<table border="1" cellspacing="0">
 *  <tr nowrap>
 *	<td valign="top"><tt>8 x 3 matrix:<br>
 *	  23, 22, 21<br>
 *	  20, 19, 18<br>
 *	  17, 16, 15<br>
 *	  14, 13, 12<br>
 *	  11, 10, 9<br>
 *	  8,  7,  6<br>
 *	  5,  4,  3<br>
 *	  2,  1,  0 </tt></td>
 *	<td align="left" valign="top">
 *	  <p><tt>column = 0;<br>
 *	    rowIndexes = {0,1,2,..,matrix.rows()-1};
 *		rowFrom = 0;<br>
 *		rowTo = matrix.rows()-1;<br>
 *		splitters = {5,10,12}<br>
 *		c = 0; <br>
 *		d = splitters.length-1;<br>
 *		partition(matrix,rowIndexes,rowFrom,rowTo,column,splitters,c,d,splitIndexes);<br>
 *		==><br>
 *		splitIndexes == {0, 2, 3}<br>
 *		rowIndexes == {7, 6, 5, 4, 0, 1, 2, 3}</tt></p>
 *	  </td>
 *	<td valign="top">
 *	  The matrix IS NOT REORDERED.<br>
 *	  Here is how it would look<br>
 *	  like, if it would be reordered<br>
 *	  accoring to <tt>rowIndexes</tt>.<br>
 *	  <tt>8 x 3 matrix:<br>
 *	  2,  1,  0<br>
 *	  5,  4,  3<br>
 *	  8,  7,  6<br>
 *	  11, 10, 9<br>
 *	  23, 22, 21<br>
 *	  20, 19, 18<br>
 *	  17, 16, 15<br>
 *	  14, 13, 12 </tt></td>
 *  </tr>
 *</table>
 *@param matrix the matrix to be partitioned.
 *@param rowIndexes the index of the i-th row; is modified by this method to reflect partitioned indexes.
 *@param rowFrom the index of the first row (inclusive).
 *@param rowTo the index of the last row (inclusive).
 *@param column the index of the column to partition on.
 *@param splitters the values at which the rows shall be split into intervals.
 *	Must be sorted ascending and must not contain multiple identical values.
 *	These preconditions are not checked; be sure that they are met.
 *
 *@param splitFrom the index of the first splitter element to be considered.
 *@param splitTo the index of the last splitter element to be considered.
 *	The method considers the splitter elements <tt>splitters[splitFrom] .. splitters[splitTo]</tt>.
 *
 *@param splitIndexes a list into which this method fills the indexes of rows delimiting intervals.
 *Upon return <tt>splitIndexes[splitFrom..splitTo]</tt> will be set accordingly.
 *Therefore, must satisfy <tt>splitIndexes.length >= splitters.length</tt>.
 */
public static void partition(DoubleMatrix2D matrix, int[] rowIndexes, int rowFrom, int rowTo, int column, final double[] splitters, int splitFrom, int splitTo, int[] splitIndexes) {
    if (rowFrom < 0 || rowTo >= matrix.rows() || rowTo >= rowIndexes.length)
        throw new IllegalArgumentException();
    if (column < 0 || column >= matrix.columns())
        throw new IllegalArgumentException();
    if (splitFrom < 0 || splitTo >= splitters.length)
        throw new IllegalArgumentException();
    if (splitIndexes.length < splitters.length)
        throw new IllegalArgumentException();
    // this one knows how to swap two row indexes (a,b)
    final int[] g = rowIndexes;
    Swapper swapper = new Swapper() {

        public void swap(int b, int c) {
            int tmp = g[b];
            g[b] = g[c];
            g[c] = tmp;
        }
    };
    // compare splitter[a] with columnView[rowIndexes[b]]
    final DoubleMatrix1D columnView = matrix.viewColumn(column);
    IntComparator comp = new IntComparator() {

        public int compare(int a, int b) {
            double av = splitters[a];
            double bv = columnView.getQuick(g[b]);
            return av < bv ? -1 : (av == bv ? 0 : 1);
        }
    };
    // compare columnView[rowIndexes[a]] with columnView[rowIndexes[b]]
    IntComparator comp2 = new IntComparator() {

        public int compare(int a, int b) {
            double av = columnView.getQuick(g[a]);
            double bv = columnView.getQuick(g[b]);
            return av < bv ? -1 : (av == bv ? 0 : 1);
        }
    };
    // compare splitter[a] with splitter[b]
    IntComparator comp3 = new IntComparator() {

        public int compare(int a, int b) {
            double av = splitters[a];
            double bv = splitters[b];
            return av < bv ? -1 : (av == bv ? 0 : 1);
        }
    };
    // generic partitioning does the main work of reordering row indexes
    cern.colt.Partitioning.genericPartition(rowFrom, rowTo, splitFrom, splitTo, splitIndexes, comp, comp2, comp3, swapper);
}
Also used : DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) IntComparator(cern.colt.function.IntComparator) Swapper(cern.colt.Swapper)

Aggregations

DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)70 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)37 DenseDoubleMatrix1D (cern.colt.matrix.impl.DenseDoubleMatrix1D)25 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)17 Algebra (cern.colt.matrix.linalg.Algebra)9 Node (edu.cmu.tetrad.graph.Node)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)6 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)5 IntComparator (cern.colt.function.IntComparator)4 DoubleArrayList (cern.colt.list.DoubleArrayList)4 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)4 DoubleFactory2D (cern.colt.matrix.DoubleFactory2D)3 Matrix (dr.math.matrixAlgebra.Matrix)2 RobustEigenDecomposition (dr.math.matrixAlgebra.RobustEigenDecomposition)2 ICovarianceMatrix (edu.cmu.tetrad.data.ICovarianceMatrix)2 Endpoint (edu.cmu.tetrad.graph.Endpoint)2 Graph (edu.cmu.tetrad.graph.Graph)2 SemGraph (edu.cmu.tetrad.graph.SemGraph)2 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)2 IOException (java.io.IOException)2