Search in sources :

Example 6 with ExpressionDataMatrixRowElement

use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method computeMissingValues.

/**
 * Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
 * missing background intensity information or one intensity channel, though obviously it is better to have all four
 * sets of values.
 *
 * @param bkgChannelA                 background channel A
 * @param bkgChannelB                 background channel B
 * @param extraMissingValueIndicators extra missing value indicators
 * @param preferred                   preferred matrix
 * @param signalChannelA              signal channel A
 * @param signalChannelB              signal channel B
 * @param signalToNoiseThreshold      noise threshold
 * @param source                      the source
 * @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
 * biomaterial dimension represented in the inputs.
 */
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
    boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
    Collection<RawExpressionDataVector> results = new HashSet<>();
    if (!okToProceed) {
        TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
        return results;
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    int count = 0;
    ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
    Double signalThreshold = Double.NaN;
    if (bkgChannelA == null && bkgChannelB == null) {
        signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
    }
    QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
    source.getQuantitationTypes().add(present);
    for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
        count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
    }
    TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
    results = twoChannelMissingValueHelperService.persist(source, results);
    return results;
}
Also used : ExpressionDataMatrixRowElement(ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) HashSet(java.util.HashSet)

Example 7 with ExpressionDataMatrixRowElement

use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.

the class PearsonMetrics method calculateMetrics.

/**
 * Calculate the linear correlation matrix of a matrix, allowing missing values. If there are no missing values,
 * this calls PearsonFast.
 */
@Override
public void calculateMetrics() {
    if (this.numMissing == 0) {
        this.calculateMetricsFast();
        return;
    }
    int numused;
    int numrows = this.dataMatrix.rows();
    int numcols = this.dataMatrix.columns();
    if (numcols < this.minNumUsed) {
        throw new IllegalArgumentException("Sorry, correlations will not be computed unless there are at least " + this.minNumUsed + " mutually present data points per vector pair, current data has only " + numcols + " columns.");
    }
    boolean docalcs = this.needToCalculateMetrics();
    boolean[][] usedB = new boolean[][] {};
    double[][] data = new double[][] {};
    if (docalcs) {
        // Temporarily copy the data in this matrix, for performance.
        usedB = new boolean[numrows][numcols];
        data = new double[numrows][numcols];
        for (int i = 0; i < numrows; i++) {
            // first vector
            for (int j = 0; j < numcols; j++) {
                // second vector
                // this is only needed if we use it below, speeds things up
                usedB[i][j] = used.get(i, j);
                // slightly.
                data[i][j] = this.dataMatrix.get(i, j);
            }
        }
        this.rowStatistics();
    }
    /* for each vector, compare it to all other vectors */
    ExpressionDataMatrixRowElement itemA;
    StopWatch timer = new StopWatch();
    timer.start();
    double[] vectorA = new double[] {};
    double syy, sxy, sxx, sx, sy, xj, yj;
    int skipped = 0;
    int numComputed = 0;
    for (int i = 0; i < numrows; i++) {
        // first vector
        itemA = this.dataMatrix.getRowElement(i);
        if (!this.hasGene(itemA)) {
            skipped++;
            continue;
        }
        if (docalcs) {
            vectorA = data[i];
        }
        boolean thisRowHasMissing = hasMissing[i];
        for (int j = i + 1; j < numrows; j++) {
            // second vector
            ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j);
            if (!this.hasGene(itemB))
                continue;
            // second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks.
            if (!docalcs || results.get(i, j) != 0.0) {
                this.keepCorrellation(i, j, results.get(i, j), numcols);
                continue;
            }
            double[] vectorB = data[j];
            /* if there are no missing values, use the faster method of calculation */
            if (!thisRowHasMissing && !hasMissing[j]) {
                this.setCorrel(i, j, this.correlFast(vectorA, vectorB, i, j), numcols);
                continue;
            }
            /* do it the old fashioned way */
            numused = 0;
            sxy = 0.0;
            sxx = 0.0;
            syy = 0.0;
            sx = 0.0;
            sy = 0.0;
            for (int k = 0; k < numcols; k++) {
                xj = vectorA[k];
                yj = vectorB[k];
                if (usedB[i][k] && usedB[j][k]) {
                    /* this is a bit faster than calling Double.isNan */
                    sx += xj;
                    sy += yj;
                    sxy += xj * yj;
                    sxx += xj * xj;
                    syy += yj * yj;
                    numused++;
                }
            }
            // of freedom isn't too low.
            if (numused < this.minNumUsed)
                this.setCorrel(i, j, Double.NaN, 0);
            else {
                double denom = this.correlationNorm(numused, sxx, sx, syy, sy);
                if (denom <= 0.0) {
                    // means variance is zero for one of the vectors.
                    this.setCorrel(i, j, 0.0, numused);
                } else {
                    double correl = (sxy - sx * sy / numused) / Math.sqrt(denom);
                    this.setCorrel(i, j, correl, numused);
                }
            }
            ++numComputed;
        }
        this.computeRow(timer, itemA, numComputed, i);
    }
    AbstractMatrixRowPairAnalysis.log.info(skipped + " rows skipped, where probe lacks a gene annotation");
    this.finishMetrics();
}
Also used : ExpressionDataMatrixRowElement(ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 8 with ExpressionDataMatrixRowElement

use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.

the class SpearmanMetrics method calculateMetrics.

/**
 * Compute correlations.
 */
@Override
public void calculateMetrics() {
    if (this.numMissing == 0) {
        this.calculateMetricsFast();
        return;
    }
    // int numused;
    int numrows = this.dataMatrix.rows();
    int numcols = this.dataMatrix.columns();
    if (numcols < this.minNumUsed) {
        throw new IllegalArgumentException("Sorry, Spearman correlations will not be computed unless there are at least " + this.minNumUsed + " data points per vector, current data has only " + numcols + " columns.");
    }
    boolean doCalcs = this.needToCalculateMetrics();
    boolean[][] usedB = null;
    if (doCalcs) {
        // Temporarily copy the data in this matrix, for performance, and rank transform.
        usedB = new boolean[numrows][numcols];
        this.getRankTransformedData(usedB);
    }
    /* for each vector, compare it to all other vectors */
    ExpressionDataMatrixRowElement itemA;
    double[] vectorA = null;
    int skipped = 0;
    int numComputed = 0;
    for (int i = 0; i < numrows; i++) {
        // first vector
        itemA = this.dataMatrix.getRowElement(i);
        if (!this.hasGene(itemA)) {
            skipped++;
            continue;
        }
        if (doCalcs) {
            vectorA = rankTransformedData[i];
        }
        boolean thisRowHasMissing = hasMissing[i];
        for (int j = i + 1; j < numrows; j++) {
            // second vector
            ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j);
            if (!this.hasGene(itemB))
                continue;
            // second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks.
            if (!doCalcs || results.get(i, j) != 0.0) {
                this.keepCorrellation(i, j, results.get(i, j), numcols);
                continue;
            }
            double[] vectorB = rankTransformedData[j];
            /* if there are no missing values, use the faster method of calculation */
            if (!thisRowHasMissing && !hasMissing[j]) {
                this.setCorrel(i, j, this.correlFast(vectorA, vectorB, i, j), numcols);
                continue;
            }
            this.spearman(vectorA, vectorB, usedB[i], usedB[j], i, j);
            ++numComputed;
        }
        ++numComputed;
        if ((i + 1) % 2000 == 0) {
            AbstractMatrixRowPairAnalysis.log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : ""));
        }
    }
    AbstractMatrixRowPairAnalysis.log.info(skipped + " rows skipped, due to no BLAT association");
    this.finishMetrics();
}
Also used : ExpressionDataMatrixRowElement(ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement)

Aggregations

ExpressionDataMatrixRowElement (ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)2 Gene (ubic.gemma.model.genome.Gene)2 HashSet (java.util.HashSet)1 StopWatch (org.apache.commons.lang3.time.StopWatch)1 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)1 Histogram (ubic.basecode.math.distribution.Histogram)1 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)1 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)1