use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DataUpdaterTest method testLoadRNASeqData.
/*
* More realistic test of RNA seq. GSE19166
*
*/
@Test
public void testLoadRNASeqData() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee;
try {
Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thaw(ee);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
assertEquals(199, probeNames.size());
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
assertEquals(199, targetArrayDesign.getCompositeSequences().size());
// Main step.
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
}
ee = experimentService.thaw(ee);
// should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
assertEquals(4, ee.getQuantitationTypes().size());
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertNotNull(ee.getNumberOfDataVectors());
assertEquals(199, ee.getNumberOfDataVectors().intValue());
// GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
// 3949585 3929008 3712314 3693219 3574068 3579631
ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
assertEquals(199, mat.rows());
TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
assertEquals(199, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(6, v.getBioAssays().size());
}
assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceImpl method getExpressionLevelsDiffEx.
@Override
@Transactional(readOnly = true)
public Collection<ExperimentExpressionLevelsValueObject> getExpressionLevelsDiffEx(Collection<ExpressionExperiment> ees, Long diffExResultSetId, double threshold, int max, boolean keepGeneNonSpecific, String consolidateMode) {
Collection<ExperimentExpressionLevelsValueObject> vos = new ArrayList<>();
// Adapted from DEDV controller
for (ExpressionExperiment ee : ees) {
Collection<DoubleVectorValueObject> vectors = this.getDiffExVectors(diffExResultSetId, threshold, max);
this.addExperimentGeneVectors(vos, ee, vectors, keepGeneNonSpecific, consolidateMode);
}
return vos;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceImpl method getExpressionLevelsPca.
@Override
@Transactional(readOnly = true)
public Collection<ExperimentExpressionLevelsValueObject> getExpressionLevelsPca(Collection<ExpressionExperiment> ees, int limit, int component, boolean keepGeneNonSpecific, String consolidateMode) {
Collection<ExperimentExpressionLevelsValueObject> vos = new ArrayList<>(ees.size());
// Adapted from DEDV controller
for (ExpressionExperiment ee : ees) {
Collection<DoubleVectorValueObject> vectors = svdService.getTopLoadedVectors(ee.getId(), component, limit).values();
this.addExperimentGeneVectors(vos, ee, vectors, keepGeneNonSpecific, consolidateMode);
}
return vos;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceImpl method getDiffExVectors.
@Override
public List<DoubleVectorValueObject> getDiffExVectors(Long resultSetId, Double threshold, int maxNumberOfResults) {
ExpressionAnalysisResultSet ar = differentialExpressionResultService.loadAnalysisResultSet(resultSetId);
if (ar == null) {
Log.warn(this.getClass(), "No diff ex result set with ID=" + resultSetId);
return null;
}
differentialExpressionResultService.thawLite(ar);
BioAssaySet analyzedSet = ar.getAnalysis().getExperimentAnalyzed();
List<DifferentialExpressionValueObject> ee2probeResults = differentialExpressionResultService.findInResultSet(ar, threshold, maxNumberOfResults, ProcessedExpressionDataVectorServiceImpl.DIFFEX_MIN_NUMBER_OF_RESULTS);
Collection<Long> probes = new HashSet<>();
// Map<CompositeSequenceId, pValue>
// using id instead of entity for map key because want to use a value object for retrieval later
Map<Long, Double> pvalues = new HashMap<>();
for (DifferentialExpressionValueObject par : ee2probeResults) {
probes.add(par.getProbeId());
pvalues.put(par.getProbeId(), par.getP());
}
Collection<DoubleVectorValueObject> processedDataArraysByProbe = this.getProcessedDataArraysByProbeIds(analyzedSet, probes);
List<DoubleVectorValueObject> dedvs = new ArrayList<>(processedDataArraysByProbe);
/*
* Resort
*/
for (DoubleVectorValueObject v : dedvs) {
v.setPvalue(pvalues.get(v.getDesignElement().getId()));
}
Collections.sort(dedvs, new Comparator<DoubleVectorValueObject>() {
@Override
public int compare(DoubleVectorValueObject o1, DoubleVectorValueObject o2) {
if (o1.getPvalue() == null)
return -1;
if (o2.getPvalue() == null)
return 1;
return o1.getPvalue().compareTo(o2.getPvalue());
}
});
return dedvs;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DiffExTest method testCountData.
/**
* Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
*/
@Test
public void testCountData() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = eeService.findByShortName("GSE29006");
if (ee != null) {
eeService.remove(ee);
}
assertTrue(eeService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = eeService.thaw(ee);
try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
assertNotNull(is);
experimentalDesignImporter.importDesign(ee, is);
}
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
ArrayDesign targetArrayDesign;
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
assertEquals(1, experimentalFactors.size());
List<String> probeNames = countMatrix.getRowNames();
assertEquals(199, probeNames.size());
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
// the experiment has 8 samples but the data has 4 columns so allow missing samples
// GSM718707 GSM718708 GSM718709 GSM718710
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
}
// make sure to do a thawRawAndProcessed() to get the addCountData() updates
ee = eeService.thaw(ee);
// verify rows and columns
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
// I confirmed that log2cpm is working same as voom here; not bothering to test directly.
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
// DE analysis without weights to assist comparison to R
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setUseWeights(false);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
assertNotNull(analyses);
assertEquals(1, analyses.size());
DifferentialExpressionAnalysis results = analyses.iterator().next();
boolean found = false;
ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
found = true;
ContrastResult contrast = r.getContrasts().iterator().next();
assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
0.00001);
// up to sign
assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
break;
}
}
assertTrue(found);
// With weights
config = new DifferentialExpressionAnalysisConfig();
// <----
config.setUseWeights(true);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
analyses = analyzer.run(ee, config);
results = analyses.iterator().next();
resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
assertEquals(1, r.getContrasts().size());
ContrastResult contrast = r.getContrasts().iterator().next();
// yes!
assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
assertEquals(0.000311, contrast.getPvalue(), 0.00001);
assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
assertEquals(0.007068, r.getPvalue(), 0.00001);
break;
}
}
}
Aggregations