use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.
the class ComBat method standardize.
/**
* Special standardization: partial regression of covariates
*
* @param b b
* @param A A
* @return double matrix 2d
*/
DoubleMatrix2D standardize(DoubleMatrix2D b, DoubleMatrix2D A) {
DoubleMatrix2D beta = new LeastSquaresFit(A, b).getCoefficients();
// assertEquals( 3.7805, beta.get( 0, 0 ), 0.001 );
// assertEquals( 0.0541, beta.get( 2, 18 ), 0.001 );
int batchIndex = 0;
DoubleMatrix2D bba = new DenseDoubleMatrix2D(1, numBatches);
for (String batchId : batches.keySet()) {
bba.set(0, batchIndex++, (double) batches.get(batchId).size() / numSamples);
}
/*
* Weight the non-batch coefficients by the batch sizes.
*/
DoubleMatrix2D grandMeanM = solver.mult(bba, beta.viewPart(0, 0, numBatches, beta.columns()));
if (hasMissing) {
varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus);
DoubleMatrix2D var = new DenseDoubleMatrix2D(varpooled.rows(), 1);
for (int i = 0; i < varpooled.rows(); i++) {
DoubleMatrix1D row = varpooled.viewRow(i);
double m = DescriptiveWithMissing.mean(new DoubleArrayList(row.toArray()));
double v = DescriptiveWithMissing.sampleVariance(new DoubleArrayList(row.toArray()), m);
var.set(i, 0, v);
}
varpooled = var;
} else {
varpooled = y.copy().assign(solver.transpose(solver.mult(x, beta)), Functions.minus).assign(Functions.pow(2));
DoubleMatrix2D scale = new DenseDoubleMatrix2D(numSamples, 1);
scale.assign(1.0 / numSamples);
varpooled = solver.mult(varpooled, scale);
}
DoubleMatrix2D size = new DenseDoubleMatrix2D(numSamples, 1);
size.assign(1.0);
/*
* The coefficients repeated for each sample.
*/
standMean = solver.mult(solver.transpose(grandMeanM), solver.transpose(size));
/*
* Erase the batch factors from a copy of the design matrix
*/
DoubleMatrix2D tmpX = x.copy();
for (batchIndex = 0; batchIndex < numBatches; batchIndex++) {
for (int j = 0; j < x.rows(); j++) {
tmpX.set(j, batchIndex, 0.0);
}
}
/*
* row means, adjusted "per group", and ignoring batch effects.
*/
standMean = standMean.assign(solver.transpose(solver.mult(tmpX, beta)), Functions.plus);
DoubleMatrix2D varsq = solver.mult(varpooled.copy().assign(Functions.sqrt), solver.transpose(size));
/*
* Subtract the mean and divide by the standard deviations.
*/
DoubleMatrix2D meansubtracted = y.copy().assign(standMean, Functions.minus);
return meansubtracted.assign(varsq, Functions.div);
}
use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.
private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
designU = this.orderMatrix(matrix, designU);
ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
boolean transformed = false;
if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
transformed = true;
MatrixStats.logTransform(matrix);
}
/*
* Process
*/
ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
// false: NONPARAMETRIC
DoubleMatrix2D results = comBat.run(true);
// note these plots always reflect the parametric setup.
// TEMPORARY?
comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
/*
* Postprocess. Results is a raw matrix/
*/
DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
correctedDataMatrix.setRowNames(matrix.getRowNames());
correctedDataMatrix.setColumnNames(matrix.getColNames());
if (transformed) {
MatrixStats.unLogTransform(correctedDataMatrix);
}
ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
/*
* It is easier if we make a new quantitationtype.
*/
QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
QuantitationType newQt = this.makeNewQuantitationType(oldQt);
correctedExpressionDataMatrix.getQuantitationTypes().clear();
correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
// Sanity check...
for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
}
return correctedExpressionDataMatrix;
}
use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.
the class ExpressionDataSVD method removeHighestComponents.
/**
* Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
* normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
* (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
* variable is known.
*
* @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < numComponentsToRemove; i++) {
copy.set(i, i, 0.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method regressionResiduals.
/**
* @param matrix on which to perform regression.
* @param config containing configuration of factors to include. Any interactions or subset configuration is
* ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
* ignored)
* @param retainScale if true, the data retain the global mean (intercept)
* @return residuals from the regression.
*/
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
if (config.getFactorsToInclude().isEmpty()) {
LinearModelAnalyzer.log.warn("No factors");
return matrix;
}
/*
* Note that this method relies on similar code to doAnalysis, for the setup stages.
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
// perform weighted least squares regression on COUNT data
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
LeastSquaresFit fit;
if (quantitationType.getScale().equals(ScaleType.COUNT)) {
LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
// note: data is not log transformed
DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
} else {
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
}
DoubleMatrix2D residuals = fit.getResiduals();
if (retainScale) {
DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
for (int i = 0; i < residuals.rows(); i++) {
residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
}
}
DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
f.setRowNames(dmatrix.getMatrix().getRowNames());
f.setColumnNames(dmatrix.getMatrix().getColNames());
return new ExpressionDataDoubleMatrix(dmatrix, f);
}
use of cern.colt.matrix.DoubleMatrix2D in project Gemma by PavlidisLab.
the class ComBatTest method test2WithMissingValues.
@Test
public void test2WithMissingValues() throws Exception {
DoubleMatrixReader f = new DoubleMatrixReader();
DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.madata.withmissing.small.txt"));
StringMatrixReader of = new StringMatrixReader();
StringMatrix<String, String> sampleInfo = of.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.metadata.small.txt"));
@SuppressWarnings({ "unchecked", "rawtypes" }) ComBat<String, String> comBat = new ComBat(testMatrix, sampleInfo);
DoubleMatrix2D X = comBat.getDesignMatrix();
assertEquals(1, X.get(0, 0), 0.001);
assertEquals(0, X.get(3, 0), 0.001);
assertEquals(1, X.get(4, 2), 0.001);
DoubleMatrix2D y = new DenseDoubleMatrix2D(testMatrix.asArray());
DoubleMatrix2D sdata = comBat.standardize(y, X);
assertEquals(-0.23640626, sdata.get(17, 1), 0.0001);
assertEquals(0.51027241, sdata.get(8, 2), 0.001);
assertEquals(0.2107944, sdata.get(0, 8), 0.001);
assertEquals(0.23769649, sdata.get(3, 7), 0.001);
assertEquals(Double.NaN, sdata.get(7, 6), 0.001);
DoubleMatrix2D finalResult = comBat.run();
assertEquals(10.660466, finalResult.get(7, 0), 0.0001);
assertEquals(11.733197, finalResult.get(7, 7), 0.0001);
assertEquals(Double.NaN, finalResult.get(7, 6), 0.0001);
assertEquals(6.802441, finalResult.get(10, 7), 0.0001);
// log.info( finalResult );
// X08.1 X54.1 X36.1 X23.1 X17.1 X40.1 X45.1 X55.1 X11.1
// 1553129_at 3.861661 3.656498 3.891722 3.969015 3.928164 3.859776 3.885422 3.831730 3.853814
// 213447_at 6.233625 5.400615 5.583825 6.034642 6.457188 6.173610 5.322877 4.591996 6.655735
// 242039_at 8.155451 8.487645 7.512280 7.043722 7.570154 7.928574 8.138381 8.538423 7.937447
// 223394_at 7.794531 8.178473 8.285406 8.316963 7.845536 8.255656 8.604694 8.184320 7.311231
// 227758_at 3.813320 3.474997 NA 3.663592 3.701014 NA 3.648964 3.618175 3.985569
// 207696_at 3.576939 3.525421 3.561366 3.506479 3.516473 3.593750 3.628095 3.676431 3.599589
// 241107_at 6.264194 5.926644 5.654168 5.730628 6.185137 5.587933 5.527347 5.895269 6.441413
// 228980_at 10.660466 11.090106 10.769495 10.990729 10.616753 11.819747 NA 11.733197 10.516411
// 204452_s_at 6.038281 5.403597 5.950596 6.443812 5.676120 5.238702 5.616082 5.290953 5.543041
// 1562443_at 4.618687 3.961298 4.671874 4.512624 4.829666 4.138126 4.232039 4.048561 4.696936
// 232018_at 6.221217 6.882512 6.093883 5.937127 5.987227 6.502522 6.940522 6.802441 5.673800
// 1561877_at 3.793029 3.751057 3.719922 3.866485 4.070190 3.658865 3.465794 3.854070 3.878497
// 221183_at 6.800233 5.559318 6.247321 6.566830 6.731457 5.701761 6.062595 5.097052 7.117171
// 206162_x_at 5.273091 5.238142 5.023724 4.886765 5.162352 5.564269 5.573007 5.980072 5.558662
// 214502_at 4.047844 NA 3.841319 4.006797 NA 4.504433 3.992359 4.192473 3.773261
// 234099_at 7.628902 6.875036 7.101699 6.929775 7.202759 6.431563 6.622195 6.751740 7.740300
// 237400_at 4.396190 NA 4.978136 4.775859 5.379108 5.809133 4.611809 4.853239 4.734252
// 240254_at 4.062600 3.851718 4.274175 4.153745 4.030111 6.324506 4.089158 3.739869 4.426321
// 209053_s_at 5.970077 6.378914 6.241240 6.450990 5.944027 6.702078 6.463590 6.372133 5.964286
}
Aggregations