use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.
the class ExpressionDataFileServiceImpl method analysisResultSetWithContrastsToString.
private String analysisResultSetWithContrastsToString(ExpressionAnalysisResultSet resultSet, Map<Long, String[]> geneAnnotations) {
StringBuilder buf = new StringBuilder();
ExperimentalFactor ef = resultSet.getExperimentalFactors().iterator().next();
// This is a bit risky, we're only looking at the first one. But this is how we do it for the header.
boolean hasNCBIIDs = !geneAnnotations.isEmpty() && geneAnnotations.values().iterator().next().length > 4;
if (ef.getType().equals(FactorType.CONTINUOUS)) {
buf.append("\tCoefficient_").append(StringUtil.makeValidForR(ef.getName())).append("\tPValue_").append(StringUtil.makeValidForR(ef.getName())).append("\n");
for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
StringBuilder rowBuffer = new StringBuilder();
if (geneAnnotations.isEmpty()) {
rowBuffer.append(dear.getProbe().getName());
} else {
this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
}
assert dear.getContrasts().size() == 1;
ContrastResult contrast = dear.getContrasts().iterator().next();
Double coefficient = contrast.getCoefficient();
Double pValue = contrast.getPvalue();
String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
String formattedCoefficient = coefficient == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, coefficient);
String contrastData = "\t" + formattedCoefficient + "\t" + formattedPvalue;
rowBuffer.append(contrastData);
buf.append(rowBuffer.toString()).append('\n');
}
} else {
Long baselineId = resultSet.getBaselineGroup().getId();
List<Long> factorValueIdOrder = new ArrayList<>();
for (FactorValue factorValue : ef.getFactorValues()) {
if (Objects.equals(factorValue.getId(), baselineId)) {
continue;
}
factorValueIdOrder.add(factorValue.getId());
// Generate column headers, try to be R-friendly
buf.append("\tFoldChange_").append(this.getFactorValueString(factorValue));
buf.append("\tTstat_").append(this.getFactorValueString(factorValue));
buf.append("\tPValue_").append(this.getFactorValueString(factorValue));
}
buf.append('\n');
// Generate element details
for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
StringBuilder rowBuffer = new StringBuilder();
this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
Map<Long, String> factorValueIdToData = new HashMap<>();
// I don't think we can expect them in the same order.
for (ContrastResult contrast : dear.getContrasts()) {
Double foldChange = contrast.getLogFoldChange();
Double pValue = contrast.getPvalue();
Double tStat = contrast.getTstat();
String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
String formattedFoldChange = foldChange == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, foldChange);
String formattedTState = tStat == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, tStat);
String contrastData = "\t" + formattedFoldChange + "\t" + formattedTState + "\t" + formattedPvalue;
assert contrast.getFactorValue() != null;
factorValueIdToData.put(contrast.getFactorValue().getId(), contrastData);
}
// Get them in the right order.
for (Long factorValueId : factorValueIdOrder) {
String s = factorValueIdToData.get(factorValueId);
if (s == null)
s = "";
rowBuffer.append(s);
}
buf.append(rowBuffer.toString()).append('\n');
}
// resultSet.getResults() loop
}
return buf.toString();
}
use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.
the class SubsettedAnalysis3Test method test.
@Test
public void test() {
ee = expressionExperimentService.thawLite(ee);
Collection<ExperimentalFactor> factors = ee.getExperimentalDesign().getExperimentalFactors();
assertEquals(3, factors.size());
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(3, ba.getSampleUsed().getFactorValues().size());
}
ExperimentalFactor organismpart = null;
ExperimentalFactor disease = null;
ExperimentalFactor diseasegroup = null;
for (ExperimentalFactor ef : factors) {
switch(ef.getCategory().getValue()) {
case "study design":
diseasegroup = ef;
break;
case "disease":
disease = ef;
break;
case "organism part":
organismpart = ef;
break;
}
}
assertNotNull(diseasegroup);
assertNotNull(disease);
assertNotNull(organismpart);
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.getFactorsToInclude().add(disease);
config.getFactorsToInclude().add(organismpart);
config.setSubsetFactor(diseasegroup);
Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
// a subset for each disease: SZ, PD, HD, ALS, ALZ, MS
assertEquals(6, analyses.size());
/*
* Now, within each we should have only one disease contrast,
*/
for (DifferentialExpressionAnalysis analysis : analyses) {
// there should be one for disease - tissue isn't used.
assertEquals(1, analysis.getResultSets().size());
for (ExpressionAnalysisResultSet rs : analysis.getResultSets()) {
ExperimentalFactor factor = rs.getExperimentalFactors().iterator().next();
// noinspection LoopStatementThatDoesntLoop
for (DifferentialExpressionAnalysisResult res : rs.getResults()) {
Collection<ContrastResult> contrasts = res.getContrasts();
for (ContrastResult cr : contrasts) {
log.info(analysis + " " + factor + " " + cr + " " + res);
}
break;
}
}
}
}
use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.
the class DiffExTest method testCountData.
/**
* Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
*/
@Test
public void testCountData() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = eeService.findByShortName("GSE29006");
if (ee != null) {
eeService.remove(ee);
}
assertTrue(eeService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = eeService.thaw(ee);
try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
assertNotNull(is);
experimentalDesignImporter.importDesign(ee, is);
}
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
ArrayDesign targetArrayDesign;
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
assertEquals(1, experimentalFactors.size());
List<String> probeNames = countMatrix.getRowNames();
assertEquals(199, probeNames.size());
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
// the experiment has 8 samples but the data has 4 columns so allow missing samples
// GSM718707 GSM718708 GSM718709 GSM718710
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
}
// make sure to do a thawRawAndProcessed() to get the addCountData() updates
ee = eeService.thaw(ee);
// verify rows and columns
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
// I confirmed that log2cpm is working same as voom here; not bothering to test directly.
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
// DE analysis without weights to assist comparison to R
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setUseWeights(false);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
assertNotNull(analyses);
assertEquals(1, analyses.size());
DifferentialExpressionAnalysis results = analyses.iterator().next();
boolean found = false;
ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
found = true;
ContrastResult contrast = r.getContrasts().iterator().next();
assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
0.00001);
// up to sign
assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
break;
}
}
assertTrue(found);
// With weights
config = new DifferentialExpressionAnalysisConfig();
// <----
config.setUseWeights(true);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
analyses = analyzer.run(ee, config);
results = analyses.iterator().next();
resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
assertEquals(1, r.getContrasts().size());
ContrastResult contrast = r.getContrasts().iterator().next();
// yes!
assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
assertEquals(0.000311, contrast.getPvalue(), 0.00001);
assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
assertEquals(0.007068, r.getPvalue(), 0.00001);
break;
}
}
}
Aggregations