use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class RowLevelFilter method computeCriteria.
private int computeCriteria(ExpressionDataDoubleMatrix data, DoubleArrayList criteria) {
int numRows = data.rows();
int numCols = data.columns();
/*
* compute criteria.
*/
DoubleArrayList rowAsList = new DoubleArrayList(new double[numCols]);
int numAllNeg = 0;
for (int i = 0; i < numRows; i++) {
Double[] row = data.getRow(i);
int numNeg = 0;
/* stupid, copy into a DoubleArrayList so we can do stats */
for (int j = 0; j < numCols; j++) {
Double item = row[j];
if (Double.isNaN(item))
rowAsList.set(j, 0);
else
rowAsList.set(j, item);
if (item < 0.0 || Double.isNaN(item)) {
numNeg++;
}
}
if (numNeg == numCols) {
numAllNeg++;
}
this.addCriterion(criteria, rowAsList, data.getDesignElementForRow(i), i);
}
return numAllNeg;
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class LinkAnalysis method getCorrelationDistribution.
public CoexpCorrelationDistribution getCorrelationDistribution() {
CoexpCorrelationDistribution result = CoexpCorrelationDistribution.Factory.newInstance();
DoubleArrayList histogramArrayList = this.metricMatrix.getHistogramArrayList();
result.setNumBins(histogramArrayList.size());
ByteArrayConverter bac = new ByteArrayConverter();
result.setBinCounts(bac.doubleArrayToBytes(MatrixUtil.fromList(histogramArrayList).toArray()));
return result;
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class LinkAnalysis method chooseCutPoints.
/**
* Compute the thresholds needed to choose links for storage in the system.
*/
private void chooseCutPoints() {
DoubleArrayList cdf = Stats.cdf(metricMatrix.getHistogramArrayList());
if (config.getCdfCut() <= 0.0) {
config.setUpperTailCut(1.0);
config.setLowerTailCut(-1.0);
return;
}
if (config.getCdfCut() >= 1.0) {
config.setUpperTailCut(0.0);
config.setLowerTailCut(0.0);
return;
}
double cdfTailCut = config.getCdfCut();
double cdfUpperCutScore = 0.0;
double cdfLowerCutScore = 0.0;
// find the lower tail cutpoint, if we have to.
if (!config.isAbsoluteValue()) {
cdfTailCut /= 2.0;
// histogram.
for (int i = 0; i < cdf.size(); i++) {
if (1.0 - cdf.get(i) >= cdfTailCut) {
cdfLowerCutScore = metricMatrix.getScoreInBin(i == cdf.size() ? i : i + 1);
break;
}
}
LinkAnalysis.log.debug(form.format(cdfLowerCutScore) + " is the lower cdf cutpoint at " + cdfTailCut);
}
// find the upper cut point.
for (int i = cdf.size() - 1; i >= 0; i--) {
if (cdf.get(i) >= cdfTailCut) {
cdfUpperCutScore = metricMatrix.getScoreInBin(i == cdf.size() ? i : i + 1);
break;
}
}
LinkAnalysis.log.debug(form.format(cdfUpperCutScore) + " is the upper cdf cutpoint at " + cdfTailCut);
// get the cutpoint based on statistical signficance.
double maxP = 1.0;
double scoreAtP = 0.0;
if (config.getFwe() != 0.0) {
double numUniqueGenes = metricMatrix.getNumUniqueGenes();
// bonferroni.
maxP = config.getFwe() / numUniqueGenes;
scoreAtP = CorrelationStats.correlationForPvalue(maxP, this.dataMatrix.columns());
LinkAnalysis.log.debug("Minimum correlation to get " + form.format(maxP) + " is about " + form.format(scoreAtP) + " for " + numUniqueGenes + " unique items (if all " + this.dataMatrix.columns() + " items are present)");
if (scoreAtP > 0.9) {
LinkAnalysis.log.warn("This data set has a very high threshold for statistical significance!");
}
}
// this is the corrected
this.metricMatrix.setPValueThreshold(maxP);
// choose cut points, with one independent criterion or the most stringent criteria
if (config.getSingularThreshold().equals(SingularThreshold.none)) {
config.setUpperTailCut(Math.max(scoreAtP, cdfUpperCutScore));
if (config.getUpperTailCut() == scoreAtP) {
config.setUpperCdfCutUsed(false);
} else if (config.getUpperTailCut() == cdfUpperCutScore) {
config.setUpperCdfCutUsed(true);
}
if (!config.isAbsoluteValue()) {
config.setLowerTailCut(Math.min(-scoreAtP, cdfLowerCutScore));
}
if (config.getLowerTailCut() == scoreAtP) {
config.setLowerCdfCutUsed(false);
} else if (config.getLowerTailCut() == cdfLowerCutScore) {
config.setLowerCdfCutUsed(true);
}
} else if (config.getSingularThreshold().equals(SingularThreshold.fwe)) {
config.setUpperTailCut(scoreAtP);
if (!config.isAbsoluteValue()) {
config.setLowerTailCut(-scoreAtP);
}
config.setUpperCdfCutUsed(false);
config.setLowerCdfCutUsed(false);
} else if (config.getSingularThreshold().equals(SingularThreshold.cdfcut)) {
config.setUpperTailCut(cdfUpperCutScore);
if (!config.isAbsoluteValue()) {
config.setLowerTailCut(cdfLowerCutScore);
}
// use only cdfCut exclusively to keep links
metricMatrix.setUsePvalueThreshold(false);
config.setUpperCdfCutUsed(true);
config.setLowerCdfCutUsed(true);
}
LinkAnalysis.log.info("Final upper cut is " + form.format(config.getUpperTailCut()));
LinkAnalysis.log.info("Final lower cut is " + form.format(config.getLowerTailCut()));
metricMatrix.setUpperTailThreshold(config.getUpperTailCut());
if (config.isAbsoluteValue()) {
metricMatrix.setLowerTailThreshold(config.getUpperTailCut());
} else {
metricMatrix.setLowerTailThreshold(config.getLowerTailCut());
}
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class SpearmanMetrics method getRankTransformedData.
/**
* @param usedB will be filled in, if not null. This also precomputes the row statistics (row means and sumsq
* deviations)
*/
private void getRankTransformedData(boolean[][] usedB) {
int numRows = this.dataMatrix.rows();
int numCols = this.dataMatrix.columns();
rankTransformedData = new double[numRows][];
for (int i = 0; i < numRows; i++) {
Double[] row = this.dataMatrix.getRow(i);
// make a copy.
double[] r = new double[row.length];
for (int m = 0, v = row.length; m < v; m++) {
r[m] = row[m];
}
DoubleArrayList ranksIA = Rank.rankTransform(new DoubleArrayList(r));
assert ranksIA != null;
double[] ri = new double[ranksIA.size()];
for (int n = 0, w = ranksIA.size(); n < w; n++) {
ri[n] = ranksIA.get(n);
}
rankTransformedData[i] = ri;
if (usedB != null) {
for (int j = 0; j < numCols; j++) {
// this is only needed if we use it below, speeds things up
usedB[i][j] = used.get(i, j);
// slightly.
}
}
}
this.rowStatistics();
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class SpearmanMetrics method spearman.
protected double spearman(double[] vectorA, double[] vectorB, boolean[] usedA, boolean[] usedB, int i, int j) {
/* because we assume there might be ties, we compute the correlation of the ranks. */
/*
* Note that if there are missing values, the precomputed ranks will be wrong. Strictly the ranks need to be
* -recomputed-.
*/
// first count the number of mutually present values
int numused = 0;
for (int k = 0; k < vectorA.length; k++) {
if (usedA[k] && usedB[k]) {
numused++;
}
}
if (numused < minNumUsed) {
this.setCorrel(i, j, Double.NaN, 0);
return Double.NaN;
}
double[] xjc;
double[] yjc;
if (numused == vectorA.length) {
xjc = vectorA;
yjc = vectorB;
} else {
xjc = new double[numused];
yjc = new double[numused];
int v = 0;
for (int k = 0; k < vectorA.length; k++) {
if (usedA[k] && usedB[k]) {
xjc[v] = vectorA[k];
yjc[v] = vectorB[k];
v++;
}
}
/*
* Retransform
*/
xjc = Rank.rankTransform(new DoubleArrayList(xjc)).elements();
yjc = Rank.rankTransform(new DoubleArrayList(yjc)).elements();
}
double correl;
double sxy = 0.0;
double sxx = 0.0;
double syy = 0.0;
double sx = 0.0;
double sy = 0.0;
numused = 0;
for (int k = 0; k < xjc.length; k++) {
double xj = xjc[k];
double yj = yjc[k];
sx += xj;
sy += yj;
sxy += xj * yj;
sxx += xj * xj;
syy += yj * yj;
numused++;
}
double denom = this.correlationNorm(numused, sxx, sx, syy, sy);
if (denom <= 0.0) {
// means variance is zero for one of the vectors.
this.setCorrel(i, j, 0.0, numused);
return 0.0;
}
correl = (sxy - sx * sy / numused) / Math.sqrt(denom);
// small range deviations (roundoff) are okay but shouldn't be big ones!
assert correl < 1.0001 && correl > -1.0001;
// roundoff protection.
if (correl < -1.0)
correl = -1.0;
else if (correl > 1.0)
correl = 1.0;
this.setCorrel(i, j, correl, numused);
return correl;
}
Aggregations