use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ExperimentDEDVEndpoint method invokeInternal.
/**
* Reads the given <code>requestElement</code>, and sends a the response back.
*
* @param requestElement the contents of the SOAP message as DOM elements
* @param document a DOM document to be used for constructing <code>Node</code>s
* @return the response element
*/
@Override
protected Element invokeInternal(Element requestElement, Document document) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
setLocalName(EXPERIMENT_LOCAL_NAME);
String eeid = "";
Collection<String> eeResults = getSingleNodeValue(requestElement, "ee_id");
for (String id : eeResults) {
eeid = id;
}
// Check to make sure we haven't already generated this EE report.
Document doc = readReport(DEFAULT_FILENAME + eeid + DEFAULT_EXTENSION);
if (doc != null) {
// Successfully got report from disk
watch.stop();
Long time = watch.getTime();
log.info("XML response for ee" + eeid + " retrieved from disk in " + time + "ms.");
return doc.getDocumentElement();
}
// Build the matrix
ExpressionExperiment ee = expressionExperimentService.load(Long.parseLong(eeid));
ee = expressionExperimentService.thawLite(ee);
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(ee);
// start building the wrapper
// build xml manually rather than use buildWrapper inherited from AbstractGemmeEndpoint
String elementName1 = "dedv";
String elementName2 = "geneIdist";
// log.info( "Building " + EXPERIMENT_LOCAL_NAME + " XML response" );
Element responseWrapper = document.createElementNS(NAMESPACE_URI, EXPERIMENT_LOCAL_NAME);
Element responseElement = document.createElementNS(NAMESPACE_URI, EXPERIMENT_LOCAL_NAME + RESPONSE);
responseWrapper.appendChild(responseElement);
if (dmatrix == null || (dmatrix.rows() == 0))
responseElement.appendChild(document.createTextNode("No " + elementName1 + " result"));
else {
for (int rowNum = 0; rowNum < dmatrix.rows(); rowNum++) {
// data vector string for output
String elementString1 = encode(dmatrix.getRow(rowNum));
String elementString2 = "";
CompositeSequence de = dmatrix.getDesignElementForRow(rowNum);
Collection<Gene> geneCol = compositeSequenceService.getGenes(de);
for (Gene gene : geneCol) {
if (elementString2.equals(""))
elementString2 = elementString2.concat(gene.getId().toString());
else
elementString2 = elementString2.concat(DELIMITER + gene.getId().toString());
}
Element e1 = document.createElement(elementName1);
e1.appendChild(document.createTextNode(elementString1));
responseElement.appendChild(e1);
Element e2 = document.createElement(elementName2);
e2.appendChild(document.createTextNode(elementString2));
responseElement.appendChild(e2);
}
}
watch.stop();
Long time = watch.getTime();
log.info("XML response for ee:" + eeid + " created from scratch in " + time + "ms.");
writeReport(responseWrapper, document, DEFAULT_FILENAME + eeid);
return responseWrapper;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class DataUpdaterTest method testAddData.
@Test
public void testAddData() throws Exception {
/*
* Load a regular data set that has no data. Platform is (basically) irrelevant.
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
ExpressionExperiment ee;
try {
// RNA-seq data.
Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
// log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thawLite(ee);
List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
assertEquals(31, bioAssays.size());
List<BioMaterial> bms = new ArrayList<>();
for (BioAssay ba : bioAssays) {
bms.add(ba.getSampleUsed());
}
targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
/*
* make up some fake data on another platform, and match it to those samples
*/
for (int i = 0; i < rawMatrix.rows(); i++) {
for (int j = 0; j < rawMatrix.columns(); j++) {
rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
}
}
List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
rawMatrix.setRowNames(probes);
rawMatrix.setColumnNames(bms);
QuantitationType qt = this.makeQt(true);
ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
assertNotNull(data.getBestBioAssayDimension());
assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
assertEquals(probes.size(), data.getMatrix().rows());
/*
* Replace it.
*/
ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertEquals(100, ee.getRawExpressionDataVectors().size());
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertTrue(v.getQuantitationType().getIsPreferred());
}
assertEquals(100, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(31, v.getBioAssays().size());
}
/*
* Test adding data (non-preferred)
*/
qt = this.makeQt(false);
ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
ee = experimentService.thaw(ee);
try {
// add preferred data twice.
dataUpdater.addData(ee, targetArrayDesign, data);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// okay.
}
dataUpdater.deleteData(ee, qt);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class DataUpdaterTest method testLoadRNASeqDataWithMissingSamples.
/*
* Test case where some samples cannot be used.
*
*/
@Test
public void testLoadRNASeqDataWithMissingSamples() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = experimentService.findByShortName("GSE29006");
if (ee != null) {
experimentService.remove(ee);
}
assertTrue(experimentService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = experimentService.thaw(ee);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
try {
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// Expected
}
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
}
/*
* Check
*/
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
assertEquals(199, mat.rows());
assertTrue(mat.getQuantitationTypes().iterator().next().getName().startsWith("log2cpm"));
assertEquals(4, ee.getBioAssays().size());
assertEquals(199 * 3, ee.getRawExpressionDataVectors().size());
assertEquals(199, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method run.
@Override
public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config) {
/*
* I apologize for this being so complicated. Basically there are four phases:
*
* 1. Get the data matrix and factors
*
* 2. Determine baseline groups; build model and contrasts
*
* 3. Run the analysis
*
* 4. Postprocess the analysis
*
* By far the most complex is #2 -- it depends on which factors and what kind they are.
*/
/*
* Initialize our matrix and factor lists...
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
/*
* FIXME this is the place to strip put the outliers.
*/
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(dmatrix, factors);
// enforce ordering
dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, dmatrix);
/*
* Do the analysis, by subsets if requested
*/
Collection<DifferentialExpressionAnalysis> results = new HashSet<>();
ExperimentalFactor subsetFactor = config.getSubsetFactor();
if (subsetFactor != null) {
if (factors.contains(subsetFactor)) {
throw new IllegalStateException("Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor + "]");
}
Map<FactorValue, ExpressionDataDoubleMatrix> subsets = this.makeSubSets(config, dmatrix, samplesUsed, subsetFactor);
LinearModelAnalyzer.log.info("Total number of subsets: " + subsets.size());
/*
* Now analyze each subset
*/
for (FactorValue subsetFactorValue : subsets.keySet()) {
LinearModelAnalyzer.log.info("Analyzing subset: " + subsetFactorValue);
/*
* Checking for DE_Exclude characteristics, which should not be included in the analysis.
* As requested in issue #4458 (bugzilla)
*/
boolean include = true;
for (Characteristic c : subsetFactorValue.getCharacteristics()) {
if (LinearModelAnalyzer.EXCLUDE_CHARACTERISTICS_VALUES.contains(c.getValue())) {
include = false;
break;
}
}
if (!include) {
LinearModelAnalyzer.log.warn(LinearModelAnalyzer.EXCLUDE_WARNING);
continue;
}
List<BioMaterial> bioMaterials = ExperimentalDesignUtils.getOrderedSamples(subsets.get(subsetFactorValue), factors);
/*
* make a EESubSet
*/
ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance();
eeSubSet.setSourceExperiment(expressionExperiment);
eeSubSet.setName("Subset for " + subsetFactorValue);
Collection<BioAssay> bioAssays = new HashSet<>();
for (BioMaterial bm : bioMaterials) {
bioAssays.addAll(bm.getBioAssaysUsedIn());
}
eeSubSet.getBioAssays().addAll(bioAssays);
Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(subsets.get(subsetFactorValue), eeSubSet, factors);
DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(factors, config, subsetFactorValue);
if (subsetFactors.isEmpty()) {
LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
continue;
}
/*
* Run analysis on the subset.
*/
DifferentialExpressionAnalysis analysis = this.doAnalysis(eeSubSet, subsetConfig, subsets.get(subsetFactorValue), bioMaterials, new ArrayList<>(subsetFactors), subsetFactorValue);
if (analysis == null) {
LinearModelAnalyzer.log.warn("No analysis results were obtained for subset: " + subsetFactorValue);
continue;
}
results.add(analysis);
}
} else {
/*
* Analyze the whole thing as one
*/
DifferentialExpressionAnalysis analysis = this.doAnalysis(expressionExperiment, config, dmatrix, samplesUsed, factors, null);
if (analysis == null) {
LinearModelAnalyzer.log.warn("No analysis results were obtained");
} else {
results.add(analysis);
}
}
return results;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method run.
@Override
public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset, DifferentialExpressionAnalysisConfig config) {
/*
* Start by setting it up like the full experiment.
*/
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(subset.getSourceExperiment());
ExperimentalFactor ef = config.getSubsetFactor();
Collection<BioMaterial> bmTmp = new HashSet<>();
for (BioAssay ba : subset.getBioAssays()) {
bmTmp.add(ba.getSampleUsed());
}
List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);
FactorValue subsetFactorValue = null;
for (BioMaterial bm : samplesInSubset) {
Collection<FactorValue> fvs = bm.getFactorValues();
for (FactorValue fv : fvs) {
if (fv.getExperimentalFactor().equals(ef)) {
if (subsetFactorValue == null) {
subsetFactorValue = fv;
} else if (!subsetFactorValue.equals(fv)) {
throw new IllegalStateException("This subset has more than one factor value for the supposed subset factor: " + fv + " and " + subsetFactorValue);
}
}
}
}
samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
// slice.
ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset, config.getFactorsToInclude());
if (subsetFactors.isEmpty()) {
LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
return null;
}
DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(), config, subsetFactorValue);
DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix, samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);
if (analysis == null) {
throw new IllegalStateException("Subset could not be analyzed with config: " + config);
}
return analysis;
}
Aggregations