use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionDataSVD method removeHighestComponents.
/**
* Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
* normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
* (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
* variable is known.
*
* @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < numComponentsToRemove; i++) {
copy.set(i, i, 0.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method regressionResiduals.
/**
* @param matrix on which to perform regression.
* @param config containing configuration of factors to include. Any interactions or subset configuration is
* ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
* ignored)
* @param retainScale if true, the data retain the global mean (intercept)
* @return residuals from the regression.
*/
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
if (config.getFactorsToInclude().isEmpty()) {
LinearModelAnalyzer.log.warn("No factors");
return matrix;
}
/*
* Note that this method relies on similar code to doAnalysis, for the setup stages.
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
// perform weighted least squares regression on COUNT data
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
LeastSquaresFit fit;
if (quantitationType.getScale().equals(ScaleType.COUNT)) {
LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
// note: data is not log transformed
DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
} else {
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
}
DoubleMatrix2D residuals = fit.getResiduals();
if (retainScale) {
DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
for (int i = 0; i < residuals.rows(); i++) {
residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
}
}
DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
f.setRowNames(dmatrix.getMatrix().getRowNames());
f.setColumnNames(dmatrix.getMatrix().getColNames());
return new ExpressionDataDoubleMatrix(dmatrix, f);
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class TTestAnalyzerTest method testOneSampleTtest.
@Test
public void testOneSampleTtest() throws Exception {
if (!connected) {
log.warn("Could not establish R connection. Skipping test ...");
return;
}
this.configureVectors(super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt");
this.configureMocks();
Collection<ExperimentalFactor> factors = new HashSet<>();
factors.add(super.experimentalFactorA_Area);
/*
* Remove factorValue from all the samples.
*/
Iterator<FactorValue> iterator = experimentalFactorA_Area.getFactorValues().iterator();
FactorValue toUse = iterator.next();
FactorValue toRemove = iterator.next();
experimentalFactorA_Area.getFactorValues().remove(toRemove);
for (BioMaterial bm : super.biomaterials) {
bm.getFactorValues().remove(toRemove);
bm.getFactorValues().add(toUse);
}
// must be for one-sample to make sense.
quantitationType.setIsRatio(true);
quantitationType.setScale(ScaleType.LOG2);
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setFactorsToInclude(factors);
Collection<DifferentialExpressionAnalysis> expressionAnalyses = analyzer.run(expressionExperiment, config);
DifferentialExpressionAnalysis expressionAnalysis = expressionAnalyses.iterator().next();
Collection<ExpressionAnalysisResultSet> resultSets = expressionAnalysis.getResultSets();
ExpressionAnalysisResultSet resultSet = resultSets.iterator().next();
assertEquals(null, resultSet.getBaselineGroup());
int numResults = resultSet.getResults().size();
assertEquals(BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS - 4, numResults);
// check
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
CompositeSequence probe = r.getProbe();
Double pvalue = r.getPvalue();
// Double stat = probeAnalysisResult.getEffectSize();
log.debug("probe: " + probe + "; p-value: " + pvalue);
assertNotNull(pvalue);
switch(probe.getName()) {
case "probe_0":
assertEquals(0.03505, pvalue, 0.00001);
break;
case "probe_16":
assertEquals(0.03476, pvalue, 0.0001);
break;
case "probe_17":
assertEquals(0.03578, pvalue, 0.0001);
break;
case "probe_75":
assertEquals(0.8897, pvalue, 0.0001);
// assertEquals( -0.1507, stat, 0.0001 );
break;
case "probe_94":
assertEquals(0.002717, pvalue, 0.0001);
// assertEquals( 6.6087, stat, 0.001 );
break;
}
}
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testReorder.
@Test
public void testReorder() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE404");
if (old != null) {
eeService.remove(old);
}
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
@SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = (ExpressionExperiment) e.getData();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
factor.setType(FactorType.CATEGORICAL);
factor.setName(ee.getShortName() + " design");
factor.setExperimentalDesign(ee.getExperimentalDesign());
factor = eeService.addFactor(ee, factor);
FactorValue fv1 = FactorValue.Factory.newInstance();
FactorValue fv2 = FactorValue.Factory.newInstance();
fv1.setValue("foo");
fv1.setExperimentalFactor(factor);
fv2.setValue("bar");
fv2.setIsBaseline(true);
fv2.setExperimentalFactor(factor);
eeService.addFactorValue(ee, fv1);
eeService.addFactorValue(ee, fv2);
List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
Collections.sort(basInOrder, new Comparator<BioAssay>() {
@Override
public int compare(BioAssay o1, BioAssay o2) {
return o1.getId().compareTo(o2.getId());
}
});
int i = 0;
for (BioAssay ba : basInOrder) {
// bioAssayService.thawRawAndProcessed( ba );
BioMaterial bm = ba.getSampleUsed();
assert fv1.getId() != null;
if (!bm.getFactorValues().isEmpty()) {
continue;
}
if (i % 2 == 0) {
bm.getFactorValues().add(fv1);
// log.info( bm + " " + bm.getId() + " => " + fv1 );
} else {
bm.getFactorValues().add(fv2);
// log.info( bm + " " + bm.getId() + " => " + fv2 );
}
bioMaterialService.update(bm);
i++;
}
factor = this.experimentalFactorService.load(factor.getId());
assertEquals(2, factor.getFactorValues().size());
/*
* All that was setup. Now do the interesting bit
*/
processedExpressionDataVectorService.reorderByDesign(ee.getId());
/*
* Now check the vectors...
*/
Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
// ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
// log.info( newMat );
boolean foundVector = false;
assertTrue(resortedVectors.size() > 0);
for (ProcessedExpressionDataVector vector : resortedVectors) {
i = 0;
log.debug(vector.getDesignElement().getName() + " .........................");
// thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
// inside a transaction
BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
bioAssayDimensionService.thawLite(bioAssayDimension);
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
for (BioAssay ba : bioAssays) {
BioMaterial bm = ba.getSampleUsed();
assertEquals(1, bm.getFactorValues().size());
FactorValue fv = bm.getFactorValues().iterator().next();
assertNotNull(fv.getId());
log.debug(ba.getId() + " " + fv.getId() + " " + fv);
if (i < 10) {
// first because it is baseline;
assertEquals(fv2, fv);
}
i++;
}
/*
* spot check the data, same place as before.
*/
if (vector.getDesignElement().getName().equals("40")) {
foundVector = true;
ByteArrayConverter conv = new ByteArrayConverter();
Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
assertEquals(20, d.length);
assertEquals(-0.08, d[1], 0.001);
assertEquals(0.45, d[10], 0.001);
assertEquals(Double.NaN, d[19], 0.001);
}
}
assertTrue("test vector not found", foundVector);
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class CharacteristicServiceTest method setup.
@Before
public void setup() {
ee = this.getTestPersistentBasicExpressionExperiment();
ee.setCharacteristics(this.getTestPersistentCharacteristics(2));
Characteristic[] eeChars = ee.getCharacteristics().toArray(new Characteristic[0]);
eeChar1 = eeChars[0];
eeChar2 = eeChars[1];
eeService.update(ee);
BioAssay ba = ee.getBioAssays().toArray(new BioAssay[0])[0];
BioMaterial bm = ba.getSampleUsed();
bm.setCharacteristics(this.getTestPersistentCharacteristics(1));
bmService.update(bm);
for (ExperimentalFactor ef : testHelper.getExperimentalFactors(ee.getExperimentalDesign())) {
eeService.addFactor(ee, ef);
}
ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors().iterator().next();
for (FactorValue f : testHelper.getFactorValues(ef)) {
eeService.addFactorValue(ee, f);
}
FactorValue fv = ef.getFactorValues().iterator().next();
fv.setCharacteristics(this.getTestPersistentCharacteristics(1));
fvService.update(fv);
}
Aggregations