use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.
the class ExpressionDataFileServiceImpl method analysisResultSetToString.
@Override
public List<DifferentialExpressionAnalysisResult> analysisResultSetToString(ExpressionAnalysisResultSet ears, Map<Long, String[]> geneAnnotations, StringBuilder buf, Map<Long, StringBuilder> probe2String, List<DifferentialExpressionAnalysisResult> sortedFirstColumnOfResults) {
if (sortedFirstColumnOfResults == null) {
// Sort P values in ears (because 1st column)
sortedFirstColumnOfResults = new ArrayList<>(ears.getResults());
Collections.sort(sortedFirstColumnOfResults, DifferentialExpressionAnalysisResultComparator.Factory.newInstance());
}
// Generate a description of the factors involved "factor1_factor2", trying to be R-friendly
StringBuilder factorColumnName = new StringBuilder();
for (ExperimentalFactor ef : ears.getExperimentalFactors()) {
factorColumnName.append(ef.getName().replaceAll("\\s+", "_")).append("_");
}
factorColumnName = new StringBuilder(StringUtil.makeValidForR(StringUtils.removeEnd(factorColumnName.toString(), "_")));
// Generate headers
buf.append("\tQValue_").append(factorColumnName);
buf.append("\tPValue_").append(factorColumnName);
// Generate probe details
for (DifferentialExpressionAnalysisResult dear : ears.getResults()) {
StringBuilder probeBuffer = new StringBuilder();
CompositeSequence cs = dear.getProbe();
// Make a hashMap so we can organize the data by probe with factors as columns
// Need to cache the information until we have it organized in the correct format to write
Long csid = cs.getId();
if (probe2String.containsKey(csid)) {
probeBuffer = probe2String.get(csid);
} else {
// no entry for probe yet
probeBuffer.append(cs.getName());
if (geneAnnotations.containsKey(csid)) {
String[] annotationStrings = geneAnnotations.get(csid);
/*
* Fields:
*
* 1: gene symbols
* 2: gene name
* 4: ncbi ID
*/
probeBuffer.append("\t").append(annotationStrings[1]).append("\t").append(annotationStrings[2]).append("\t").append(annotationStrings[4]);
} else {
probeBuffer.append("\t\t\t");
}
probe2String.put(csid, probeBuffer);
}
Double correctedPvalue = dear.getCorrectedPvalue();
Double pvalue = dear.getPvalue();
String formattedCP = correctedPvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, correctedPvalue);
String formattedP = pvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pvalue);
probeBuffer.append("\t").append(formattedCP).append("\t").append(formattedP);
}
return sortedFirstColumnOfResults;
}
use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.
the class DifferentialExpressionAnalyzerServiceImpl method extendResultSet.
private void extendResultSet(ExpressionAnalysisResultSet oldrs, ExpressionAnalysisResultSet temprs) {
assert oldrs.getId() != null;
/*
* Copy the results over.
*/
Map<CompositeSequence, DifferentialExpressionAnalysisResult> p2der = new HashMap<>();
for (DifferentialExpressionAnalysisResult der : oldrs.getResults()) {
p2der.put(der.getProbe(), der);
}
Collection<DifferentialExpressionAnalysisResult> toAdd = new ArrayList<>();
for (DifferentialExpressionAnalysisResult newr : temprs.getResults()) {
if (!p2der.containsKey(newr.getProbe())) {
toAdd.add(newr);
}
newr.setResultSet(oldrs);
}
if (toAdd.isEmpty()) {
DifferentialExpressionAnalyzerServiceImpl.log.warn("Somewhat surprisingly, no new results were added");
} else {
DifferentialExpressionAnalyzerServiceImpl.log.info(toAdd.size() + " transient results added to the old analysis result set: " + oldrs.getId());
}
boolean added = oldrs.getResults().addAll(toAdd);
assert added;
assert oldrs.getResults().size() >= toAdd.size();
}
use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.
the class DifferentialExpressionAnalyzerServiceImpl method addPvalueDistribution.
private void addPvalueDistribution(ExpressionAnalysisResultSet resultSet) {
Histogram pvalHist = new Histogram("", 100, 0.0, 1.0);
for (DifferentialExpressionAnalysisResult result : resultSet.getResults()) {
Double pvalue = result.getPvalue();
if (pvalue != null)
pvalHist.fill(pvalue);
}
PvalueDistribution pvd = PvalueDistribution.Factory.newInstance();
pvd.setNumBins(100);
ByteArrayConverter bac = new ByteArrayConverter();
pvd.setBinCounts(bac.doubleArrayToBytes(pvalHist.getArray()));
// do not save yet.
resultSet.setPvalueDistribution(pvd);
}
use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.
the class ExpressionDataFileServiceImpl method analysisResultSetWithContrastsToString.
private String analysisResultSetWithContrastsToString(ExpressionAnalysisResultSet resultSet, Map<Long, String[]> geneAnnotations) {
StringBuilder buf = new StringBuilder();
ExperimentalFactor ef = resultSet.getExperimentalFactors().iterator().next();
// This is a bit risky, we're only looking at the first one. But this is how we do it for the header.
boolean hasNCBIIDs = !geneAnnotations.isEmpty() && geneAnnotations.values().iterator().next().length > 4;
if (ef.getType().equals(FactorType.CONTINUOUS)) {
buf.append("\tCoefficient_").append(StringUtil.makeValidForR(ef.getName())).append("\tPValue_").append(StringUtil.makeValidForR(ef.getName())).append("\n");
for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
StringBuilder rowBuffer = new StringBuilder();
if (geneAnnotations.isEmpty()) {
rowBuffer.append(dear.getProbe().getName());
} else {
this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
}
assert dear.getContrasts().size() == 1;
ContrastResult contrast = dear.getContrasts().iterator().next();
Double coefficient = contrast.getCoefficient();
Double pValue = contrast.getPvalue();
String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
String formattedCoefficient = coefficient == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, coefficient);
String contrastData = "\t" + formattedCoefficient + "\t" + formattedPvalue;
rowBuffer.append(contrastData);
buf.append(rowBuffer.toString()).append('\n');
}
} else {
Long baselineId = resultSet.getBaselineGroup().getId();
List<Long> factorValueIdOrder = new ArrayList<>();
for (FactorValue factorValue : ef.getFactorValues()) {
if (Objects.equals(factorValue.getId(), baselineId)) {
continue;
}
factorValueIdOrder.add(factorValue.getId());
// Generate column headers, try to be R-friendly
buf.append("\tFoldChange_").append(this.getFactorValueString(factorValue));
buf.append("\tTstat_").append(this.getFactorValueString(factorValue));
buf.append("\tPValue_").append(this.getFactorValueString(factorValue));
}
buf.append('\n');
// Generate element details
for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
StringBuilder rowBuffer = new StringBuilder();
this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
Map<Long, String> factorValueIdToData = new HashMap<>();
// I don't think we can expect them in the same order.
for (ContrastResult contrast : dear.getContrasts()) {
Double foldChange = contrast.getLogFoldChange();
Double pValue = contrast.getPvalue();
Double tStat = contrast.getTstat();
String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
String formattedFoldChange = foldChange == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, foldChange);
String formattedTState = tStat == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, tStat);
String contrastData = "\t" + formattedFoldChange + "\t" + formattedTState + "\t" + formattedPvalue;
assert contrast.getFactorValue() != null;
factorValueIdToData.put(contrast.getFactorValue().getId(), contrastData);
}
// Get them in the right order.
for (Long factorValueId : factorValueIdOrder) {
String s = factorValueIdToData.get(factorValueId);
if (s == null)
s = "";
rowBuffer.append(s);
}
buf.append(rowBuffer.toString()).append('\n');
}
// resultSet.getResults() loop
}
return buf.toString();
}
use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.
the class ExpressionDataFileServiceImpl method analysisResultSetsToString.
@Override
public void analysisResultSetsToString(Collection<ExpressionAnalysisResultSet> results, Map<Long, String[]> geneAnnotations, StringBuilder buf) {
Map<Long, StringBuilder> probe2String = new HashMap<>();
List<DifferentialExpressionAnalysisResult> sortedFirstColumnOfResults = null;
for (ExpressionAnalysisResultSet ears : results) {
sortedFirstColumnOfResults = this.analysisResultSetToString(ears, geneAnnotations, buf, probe2String, sortedFirstColumnOfResults);
}
// ears loop
buf.append("\n");
if (sortedFirstColumnOfResults == null) {
throw new IllegalStateException("No results for ");
}
// Dump the probe data in the sorted order of the 1st column that we originally sorted
for (DifferentialExpressionAnalysisResult sortedResult : sortedFirstColumnOfResults) {
CompositeSequence cs = sortedResult.getProbe();
StringBuilder sb = probe2String.get(cs.getId());
if (sb == null) {
ExpressionDataFileServiceImpl.log.warn("Unable to find element " + cs.getId() + " in map");
break;
}
buf.append(sb);
buf.append("\n");
}
}
Aggregations