use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method getRanks.
private DoubleArrayList getRanks(ExpressionDataDoubleMatrix intensities, ProcessedExpressionDataVectorDao.RankMethod method) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.debug("Getting ranks");
assert intensities != null;
DoubleArrayList result = new DoubleArrayList(intensities.rows());
for (ExpressionDataMatrixRowElement de : intensities.getRowElements()) {
double[] rowObj = ArrayUtils.toPrimitive(intensities.getRow(de.getDesignElement()));
double valueForRank = Double.MIN_VALUE;
if (rowObj != null) {
DoubleArrayList row = new DoubleArrayList(rowObj);
switch(method) {
case max:
valueForRank = DescriptiveWithMissing.max(row);
break;
case mean:
valueForRank = DescriptiveWithMissing.mean(row);
break;
default:
throw new UnsupportedOperationException();
}
}
result.add(valueForRank);
}
return Rank.rankTransform(result);
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class ComBat method run.
/**
* @param parametric if false, use the non-parametric (slower) method for estimating the priors.
* @return corrected data
* @throws ComBatException combat problems
*/
public DoubleMatrix2D run(boolean parametric) throws ComBatException {
StopWatch timer = new StopWatch();
timer.start();
final DoubleMatrix2D sdata = this.standardize(y, x);
this.checkForProblems(sdata);
if (timer.getTime() > 1000) {
ComBat.log.info("Standardized");
}
timer.reset();
timer.start();
this.gammaHat(sdata);
this.deltaHat(sdata);
// assertEquals( 1.618, deltaHat.get( 0, 0 ), 0.001 );
// gamma.bar <- apply(gamma.hat, 1, mean)
gammaBar = new DoubleArrayList();
t2 = new DoubleArrayList();
for (int batchIndex = 0; batchIndex < gammaHat.rows(); batchIndex++) {
double mean = DescriptiveWithMissing.mean(new DoubleArrayList(gammaHat.viewRow(batchIndex).toArray()));
gammaBar.add(mean);
t2.add(DescriptiveWithMissing.sampleVariance(new DoubleArrayList(gammaHat.viewRow(batchIndex).toArray()), mean));
}
// assertEquals( -0.092144, gammaBar.get( 0 ), 0.001 );
// assertEquals( 0.2977, t2.get( 1 ), 0.001 );
aPrior = this.aPrior(deltaHat);
bPrior = this.bPrior(deltaHat);
if (timer.getTime() > 1000) {
ComBat.log.info("Computed priors");
}
// assertEquals( 17.4971, aPrior.get( 0 ), 0.0001 );
// assertEquals( 4.514, bPrior.get( 1 ), 0.0001 );
DoubleMatrix2D gammastar = new DenseDoubleMatrix2D(numBatches, numProbes);
DoubleMatrix2D deltastar = new DenseDoubleMatrix2D(numBatches, numProbes);
if (!parametric) {
this.runNonParametric(sdata, gammastar, deltastar);
} else {
this.runParametric(sdata, gammastar, deltastar);
}
DoubleMatrix2D adjustedData = this.rawAdjust(sdata, gammastar, deltastar);
// assertEquals( -0.95099, adjustedData.get( 18, 0 ), 0.0001 );
// assertEquals( -0.30273984, adjustedData.get( 14, 6 ), 0.0001 );
// assertEquals( 0.2097977, adjustedData.get( 7, 3 ), 0.0001 );
// log.info( adjustedData );
DoubleMatrix2D result = this.restoreScale(adjustedData);
if (timer.getTime() > 1000) {
ComBat.log.info("Done");
}
return result;
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class ComBat method deltaHat.
private void deltaHat(DoubleMatrix2D sdata) {
int batchIndex;
deltaHat = new DenseDoubleMatrix2D(numBatches, numProbes);
batchIndex = 0;
for (String batchId : batches.keySet()) {
DoubleMatrix2D batchData = this.getBatchData(sdata, batchId);
for (int j = 0; j < batchData.rows(); j++) {
DoubleArrayList row = new DoubleArrayList(batchData.viewRow(j).toArray());
double variance = DescriptiveWithMissing.sampleVariance(row, DescriptiveWithMissing.mean(row));
deltaHat.set(batchIndex, j, variance);
}
batchIndex++;
}
}
use of cern.colt.list.DoubleArrayList in project Gemma by PavlidisLab.
the class ComBat method standardize.
/**
* Special standardization: partial regression of covariates
*
* @param b b
* @param A A
* @return double matrix 2d
*/
DoubleMatrix2D standardize(DoubleMatrix2D b, DoubleMatrix2D A) {
DoubleMatrix2D beta = new LeastSquaresFit(A, b).getCoefficients();
// assertEquals( 3.7805, beta.get( 0, 0 ), 0.001 );
// assertEquals( 0.0541, beta.get( 2, 18 ), 0.001 );
int batchIndex = 0;
DoubleMatrix2D bba = new DenseDoubleMatrix2D(1, numBatches);
for (String batchId : batches.keySet()) {
bba.set(0, batchIndex++, (double) batches.get(batchId).size() / numSamples);
}
/*
* Weight the non-batch coefficients by the batch sizes.
*/
DoubleMatrix2D grandMeanM = solver.mult(bba, beta.viewPart(0, 0, numBatches, beta.columns()));
if (hasMissing) {
varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus);
DoubleMatrix2D var = new DenseDoubleMatrix2D(varpooled.rows(), 1);
for (int i = 0; i < varpooled.rows(); i++) {
DoubleMatrix1D row = varpooled.viewRow(i);
double m = DescriptiveWithMissing.mean(new DoubleArrayList(row.toArray()));
double v = DescriptiveWithMissing.sampleVariance(new DoubleArrayList(row.toArray()), m);
var.set(i, 0, v);
}
varpooled = var;
} else {
varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus).assign(Functions.pow(2));
DoubleMatrix2D scale = new DenseDoubleMatrix2D(numSamples, 1);
scale.assign(1.0 / numSamples);
varpooled = solver.mult(varpooled, scale);
}
DoubleMatrix2D size = new DenseDoubleMatrix2D(numSamples, 1);
size.assign(1.0);
/*
* The coefficients repeated for each sample.
*/
standMean = solver.mult(solver.transpose(grandMeanM), solver.transpose(size));
/*
* Erase the batch factors from a copy of the design matrix
*/
DoubleMatrix2D tmpX = x.copy();
for (batchIndex = 0; batchIndex < numBatches; batchIndex++) {
for (int j = 0; j < x.rows(); j++) {
tmpX.set(j, batchIndex, 0.0);
}
}
/*
* row means, adjusted "per group", and ignoring batch effects.
*/
standMean = standMean.assign(solver.transpose(solver.mult(tmpX, beta)), Functions.plus);
DoubleMatrix2D varsq = solver.mult(varpooled.copy().assign(Functions.sqrt), solver.transpose(size));
/*
* Subtract the mean and divide by the standard deviations.
*/
DoubleMatrix2D meansubtracted = y.copy().assign(standMean, Functions.minus);
return meansubtracted.assign(varsq, Functions.div);
}
Aggregations