use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
/**
* Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
* missing background intensity information or one intensity channel, though obviously it is better to have all four
* sets of values.
*
* @param bkgChannelA background channel A
* @param bkgChannelB background channel B
* @param extraMissingValueIndicators extra missing value indicators
* @param preferred preferred matrix
* @param signalChannelA signal channel A
* @param signalChannelB signal channel B
* @param signalToNoiseThreshold noise threshold
* @param source the source
* @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
* biomaterial dimension represented in the inputs.
*/
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
Collection<RawExpressionDataVector> results = new HashSet<>();
if (!okToProceed) {
TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
return results;
}
ByteArrayConverter converter = new ByteArrayConverter();
int count = 0;
ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
Double signalThreshold = Double.NaN;
if (bkgChannelA == null && bkgChannelB == null) {
signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
}
QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
source.getQuantitationTypes().add(present);
for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
}
TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
results = twoChannelMissingValueHelperService.persist(source, results);
return results;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.
the class PearsonMetrics method calculateMetrics.
/**
* Calculate the linear correlation matrix of a matrix, allowing missing values. If there are no missing values,
* this calls PearsonFast.
*/
@Override
public void calculateMetrics() {
if (this.numMissing == 0) {
this.calculateMetricsFast();
return;
}
int numused;
int numrows = this.dataMatrix.rows();
int numcols = this.dataMatrix.columns();
if (numcols < this.minNumUsed) {
throw new IllegalArgumentException("Sorry, correlations will not be computed unless there are at least " + this.minNumUsed + " mutually present data points per vector pair, current data has only " + numcols + " columns.");
}
boolean docalcs = this.needToCalculateMetrics();
boolean[][] usedB = new boolean[][] {};
double[][] data = new double[][] {};
if (docalcs) {
// Temporarily copy the data in this matrix, for performance.
usedB = new boolean[numrows][numcols];
data = new double[numrows][numcols];
for (int i = 0; i < numrows; i++) {
// first vector
for (int j = 0; j < numcols; j++) {
// second vector
// this is only needed if we use it below, speeds things up
usedB[i][j] = used.get(i, j);
// slightly.
data[i][j] = this.dataMatrix.get(i, j);
}
}
this.rowStatistics();
}
/* for each vector, compare it to all other vectors */
ExpressionDataMatrixRowElement itemA;
StopWatch timer = new StopWatch();
timer.start();
double[] vectorA = new double[] {};
double syy, sxy, sxx, sx, sy, xj, yj;
int skipped = 0;
int numComputed = 0;
for (int i = 0; i < numrows; i++) {
// first vector
itemA = this.dataMatrix.getRowElement(i);
if (!this.hasGene(itemA)) {
skipped++;
continue;
}
if (docalcs) {
vectorA = data[i];
}
boolean thisRowHasMissing = hasMissing[i];
for (int j = i + 1; j < numrows; j++) {
// second vector
ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j);
if (!this.hasGene(itemB))
continue;
// second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks.
if (!docalcs || results.get(i, j) != 0.0) {
this.keepCorrellation(i, j, results.get(i, j), numcols);
continue;
}
double[] vectorB = data[j];
/* if there are no missing values, use the faster method of calculation */
if (!thisRowHasMissing && !hasMissing[j]) {
this.setCorrel(i, j, this.correlFast(vectorA, vectorB, i, j), numcols);
continue;
}
/* do it the old fashioned way */
numused = 0;
sxy = 0.0;
sxx = 0.0;
syy = 0.0;
sx = 0.0;
sy = 0.0;
for (int k = 0; k < numcols; k++) {
xj = vectorA[k];
yj = vectorB[k];
if (usedB[i][k] && usedB[j][k]) {
/* this is a bit faster than calling Double.isNan */
sx += xj;
sy += yj;
sxy += xj * yj;
sxx += xj * xj;
syy += yj * yj;
numused++;
}
}
// of freedom isn't too low.
if (numused < this.minNumUsed)
this.setCorrel(i, j, Double.NaN, 0);
else {
double denom = this.correlationNorm(numused, sxx, sx, syy, sy);
if (denom <= 0.0) {
// means variance is zero for one of the vectors.
this.setCorrel(i, j, 0.0, numused);
} else {
double correl = (sxy - sx * sy / numused) / Math.sqrt(denom);
this.setCorrel(i, j, correl, numused);
}
}
++numComputed;
}
this.computeRow(timer, itemA, numComputed, i);
}
AbstractMatrixRowPairAnalysis.log.info(skipped + " rows skipped, where probe lacks a gene annotation");
this.finishMetrics();
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement in project Gemma by PavlidisLab.
the class SpearmanMetrics method calculateMetrics.
/**
* Compute correlations.
*/
@Override
public void calculateMetrics() {
if (this.numMissing == 0) {
this.calculateMetricsFast();
return;
}
// int numused;
int numrows = this.dataMatrix.rows();
int numcols = this.dataMatrix.columns();
if (numcols < this.minNumUsed) {
throw new IllegalArgumentException("Sorry, Spearman correlations will not be computed unless there are at least " + this.minNumUsed + " data points per vector, current data has only " + numcols + " columns.");
}
boolean doCalcs = this.needToCalculateMetrics();
boolean[][] usedB = null;
if (doCalcs) {
// Temporarily copy the data in this matrix, for performance, and rank transform.
usedB = new boolean[numrows][numcols];
this.getRankTransformedData(usedB);
}
/* for each vector, compare it to all other vectors */
ExpressionDataMatrixRowElement itemA;
double[] vectorA = null;
int skipped = 0;
int numComputed = 0;
for (int i = 0; i < numrows; i++) {
// first vector
itemA = this.dataMatrix.getRowElement(i);
if (!this.hasGene(itemA)) {
skipped++;
continue;
}
if (doCalcs) {
vectorA = rankTransformedData[i];
}
boolean thisRowHasMissing = hasMissing[i];
for (int j = i + 1; j < numrows; j++) {
// second vector
ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j);
if (!this.hasGene(itemB))
continue;
// second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks.
if (!doCalcs || results.get(i, j) != 0.0) {
this.keepCorrellation(i, j, results.get(i, j), numcols);
continue;
}
double[] vectorB = rankTransformedData[j];
/* if there are no missing values, use the faster method of calculation */
if (!thisRowHasMissing && !hasMissing[j]) {
this.setCorrel(i, j, this.correlFast(vectorA, vectorB, i, j), numcols);
continue;
}
this.spearman(vectorA, vectorB, usedB[i], usedB[j], i, j);
++numComputed;
}
++numComputed;
if ((i + 1) % 2000 == 0) {
AbstractMatrixRowPairAnalysis.log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : ""));
}
}
AbstractMatrixRowPairAnalysis.log.info(skipped + " rows skipped, due to no BLAT association");
this.finishMetrics();
}
Aggregations