use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class DataUpdaterTest method testLoadRNASeqDataWithMissingSamples.
/*
* Test case where some samples cannot be used.
*
*/
@Test
public void testLoadRNASeqDataWithMissingSamples() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = experimentService.findByShortName("GSE29006");
if (ee != null) {
experimentService.remove(ee);
}
assertTrue(experimentService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = experimentService.thaw(ee);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
try {
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// Expected
}
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
}
/*
* Check
*/
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
assertEquals(199, mat.rows());
assertTrue(mat.getQuantitationTypes().iterator().next().getName().startsWith("log2cpm"));
assertEquals(4, ee.getBioAssays().size());
assertEquals(199 * 3, ee.getRawExpressionDataVectors().size());
assertEquals(199, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class SampleCoexpressionMatrixServiceTest method test.
@Test
public void test() {
ExpressionExperiment ee = super.getTestPersistentCompleteExpressionExperiment(false);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
DoubleMatrix<BioAssay, BioAssay> matrix = sampleCoexpressionMatrixService.create(ee);
this.check(matrix);
// recompute ...
matrix = sampleCoexpressionMatrixService.create(ee);
this.check(matrix);
matrix = sampleCoexpressionMatrixService.findOrCreate(ee);
this.check(matrix);
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExperimentalDesignImportDuplicateValueTest method testParse.
/*
* Note that this test will fail if you run it again on a dirty DB. Sorry!
*/
@Test
public final void testParse() throws Exception {
try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/expdesign.import.testfull.txt")) {
experimentalDesignImporter.importDesign(ee, is);
}
Collection<BioMaterial> bms = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
BioMaterial bm = ba.getSampleUsed();
bms.add(bm);
}
checkResults(bms);
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method run.
@Override
public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config) {
/*
* I apologize for this being so complicated. Basically there are four phases:
*
* 1. Get the data matrix and factors
*
* 2. Determine baseline groups; build model and contrasts
*
* 3. Run the analysis
*
* 4. Postprocess the analysis
*
* By far the most complex is #2 -- it depends on which factors and what kind they are.
*/
/*
* Initialize our matrix and factor lists...
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
/*
* FIXME this is the place to strip put the outliers.
*/
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(dmatrix, factors);
// enforce ordering
dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, dmatrix);
/*
* Do the analysis, by subsets if requested
*/
Collection<DifferentialExpressionAnalysis> results = new HashSet<>();
ExperimentalFactor subsetFactor = config.getSubsetFactor();
if (subsetFactor != null) {
if (factors.contains(subsetFactor)) {
throw new IllegalStateException("Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor + "]");
}
Map<FactorValue, ExpressionDataDoubleMatrix> subsets = this.makeSubSets(config, dmatrix, samplesUsed, subsetFactor);
LinearModelAnalyzer.log.info("Total number of subsets: " + subsets.size());
/*
* Now analyze each subset
*/
for (FactorValue subsetFactorValue : subsets.keySet()) {
LinearModelAnalyzer.log.info("Analyzing subset: " + subsetFactorValue);
/*
* Checking for DE_Exclude characteristics, which should not be included in the analysis.
* As requested in issue #4458 (bugzilla)
*/
boolean include = true;
for (Characteristic c : subsetFactorValue.getCharacteristics()) {
if (LinearModelAnalyzer.EXCLUDE_CHARACTERISTICS_VALUES.contains(c.getValue())) {
include = false;
break;
}
}
if (!include) {
LinearModelAnalyzer.log.warn(LinearModelAnalyzer.EXCLUDE_WARNING);
continue;
}
List<BioMaterial> bioMaterials = ExperimentalDesignUtils.getOrderedSamples(subsets.get(subsetFactorValue), factors);
/*
* make a EESubSet
*/
ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance();
eeSubSet.setSourceExperiment(expressionExperiment);
eeSubSet.setName("Subset for " + subsetFactorValue);
Collection<BioAssay> bioAssays = new HashSet<>();
for (BioMaterial bm : bioMaterials) {
bioAssays.addAll(bm.getBioAssaysUsedIn());
}
eeSubSet.getBioAssays().addAll(bioAssays);
Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(subsets.get(subsetFactorValue), eeSubSet, factors);
DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(factors, config, subsetFactorValue);
if (subsetFactors.isEmpty()) {
LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
continue;
}
/*
* Run analysis on the subset.
*/
DifferentialExpressionAnalysis analysis = this.doAnalysis(eeSubSet, subsetConfig, subsets.get(subsetFactorValue), bioMaterials, new ArrayList<>(subsetFactors), subsetFactorValue);
if (analysis == null) {
LinearModelAnalyzer.log.warn("No analysis results were obtained for subset: " + subsetFactorValue);
continue;
}
results.add(analysis);
}
} else {
/*
* Analyze the whole thing as one
*/
DifferentialExpressionAnalysis analysis = this.doAnalysis(expressionExperiment, config, dmatrix, samplesUsed, factors, null);
if (analysis == null) {
LinearModelAnalyzer.log.warn("No analysis results were obtained");
} else {
results.add(analysis);
}
}
return results;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method run.
@Override
public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset, DifferentialExpressionAnalysisConfig config) {
/*
* Start by setting it up like the full experiment.
*/
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(subset.getSourceExperiment());
ExperimentalFactor ef = config.getSubsetFactor();
Collection<BioMaterial> bmTmp = new HashSet<>();
for (BioAssay ba : subset.getBioAssays()) {
bmTmp.add(ba.getSampleUsed());
}
List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);
FactorValue subsetFactorValue = null;
for (BioMaterial bm : samplesInSubset) {
Collection<FactorValue> fvs = bm.getFactorValues();
for (FactorValue fv : fvs) {
if (fv.getExperimentalFactor().equals(ef)) {
if (subsetFactorValue == null) {
subsetFactorValue = fv;
} else if (!subsetFactorValue.equals(fv)) {
throw new IllegalStateException("This subset has more than one factor value for the supposed subset factor: " + fv + " and " + subsetFactorValue);
}
}
}
}
samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
// slice.
ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset, config.getFactorsToInclude());
if (subsetFactors.isEmpty()) {
LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
return null;
}
DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(), config, subsetFactorValue);
DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix, samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);
if (analysis == null) {
throw new IllegalStateException("Subset could not be analyzed with config: " + config);
}
return analysis;
}
Aggregations