use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionDataSVD method removeHighestComponents.
/**
* Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
* normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
* (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
* variable is known.
*
* @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < numComponentsToRemove; i++) {
copy.set(i, i, 0.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method regressionResiduals.
/**
* @param matrix on which to perform regression.
* @param config containing configuration of factors to include. Any interactions or subset configuration is
* ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
* ignored)
* @param retainScale if true, the data retain the global mean (intercept)
* @return residuals from the regression.
*/
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
if (config.getFactorsToInclude().isEmpty()) {
LinearModelAnalyzer.log.warn("No factors");
return matrix;
}
/*
* Note that this method relies on similar code to doAnalysis, for the setup stages.
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
// perform weighted least squares regression on COUNT data
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
LeastSquaresFit fit;
if (quantitationType.getScale().equals(ScaleType.COUNT)) {
LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
// note: data is not log transformed
DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
} else {
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
}
DoubleMatrix2D residuals = fit.getResiduals();
if (retainScale) {
DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
for (int i = 0; i < residuals.rows(); i++) {
residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
}
}
DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
f.setRowNames(dmatrix.getMatrix().getRowNames());
f.setColumnNames(dmatrix.getMatrix().getColNames());
return new ExpressionDataDoubleMatrix(dmatrix, f);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class PreprocessorServiceImpl method getCorrectedData.
private ExpressionDataDoubleMatrix getCorrectedData(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> vecs) throws PreprocessingException {
/*
* FIXME perhaps here we should remove rows that are going to be problematic?
*/
ExpressionDataDoubleMatrix correctedData = expressionExperimentBatchCorrectionService.comBat(new ExpressionDataDoubleMatrix(vecs));
if (correctedData == null || correctedData.rows() != vecs.size()) {
throw new PreprocessingException(ee.getShortName() + " could not be batch-corrected (matrix returned by ComBat had wrong number of rows)");
}
this.checkQuantitationType(correctedData);
return correctedData;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.
/**
* Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
*/
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.thaw(preferredVectors);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
assertEquals(10, mat.columns());
boolean found = false;
for (int i = 0; i < mat.rows(); i++) {
Double[] row = mat.getRow(i);
// debugging
if (i == 0) {
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
System.err.println(ba.getName());
}
}
System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
CompositeSequence el = mat.getDesignElementForRow(i);
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
/*
* Now do this through the processedExpressionDataVectorService
*/
Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(30, da.size());
found = false;
boolean first = true;
for (DoubleVectorValueObject v : da) {
CompositeSequenceValueObject el = v.getDesignElement();
double[] row = v.getData();
// debugging
if (first) {
for (int j = 0; j < row.length; j++) {
BioAssayValueObject ba = v.getBioAssays().get(j);
System.err.println(ba.getName());
}
first = false;
}
System.err.print(el.getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
assertEquals(10, row.length);
for (int j = 0; j < row.length; j++) {
assertNotNull(v.getBioAssays());
BioAssayValueObject ba = v.getBioAssays().get(j);
if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceTest method testComBatOnEE.
@Test
public void testComBatOnEE() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse18162Short")));
ExpressionExperiment newee;
try {
Collection<?> results = geoService.fetchAndLoad("GSE18162", false, true, false);
newee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
}
assertNotNull(newee);
newee = expressionExperimentService.thawLite(newee);
processedExpressionDataVectorService.computeProcessedExpressionData(newee);
try (InputStream deis = this.getClass().getResourceAsStream("/data/loader/expression/geo/gse18162Short/design.txt")) {
experimentalDesignImporter.importDesign(newee, deis);
}
ExpressionDataDoubleMatrix comBat = correctionService.comBat(newee);
assertNotNull(comBat);
}
Aggregations