Search in sources :

Example 11 with DifferentialExpressionAnalysisResult

use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method analysisResultSetToString.

@Override
public List<DifferentialExpressionAnalysisResult> analysisResultSetToString(ExpressionAnalysisResultSet ears, Map<Long, String[]> geneAnnotations, StringBuilder buf, Map<Long, StringBuilder> probe2String, List<DifferentialExpressionAnalysisResult> sortedFirstColumnOfResults) {
    if (sortedFirstColumnOfResults == null) {
        // Sort P values in ears (because 1st column)
        sortedFirstColumnOfResults = new ArrayList<>(ears.getResults());
        Collections.sort(sortedFirstColumnOfResults, DifferentialExpressionAnalysisResultComparator.Factory.newInstance());
    }
    // Generate a description of the factors involved "factor1_factor2", trying to be R-friendly
    StringBuilder factorColumnName = new StringBuilder();
    for (ExperimentalFactor ef : ears.getExperimentalFactors()) {
        factorColumnName.append(ef.getName().replaceAll("\\s+", "_")).append("_");
    }
    factorColumnName = new StringBuilder(StringUtil.makeValidForR(StringUtils.removeEnd(factorColumnName.toString(), "_")));
    // Generate headers
    buf.append("\tQValue_").append(factorColumnName);
    buf.append("\tPValue_").append(factorColumnName);
    // Generate probe details
    for (DifferentialExpressionAnalysisResult dear : ears.getResults()) {
        StringBuilder probeBuffer = new StringBuilder();
        CompositeSequence cs = dear.getProbe();
        // Make a hashMap so we can organize the data by probe with factors as columns
        // Need to cache the information until we have it organized in the correct format to write
        Long csid = cs.getId();
        if (probe2String.containsKey(csid)) {
            probeBuffer = probe2String.get(csid);
        } else {
            // no entry for probe yet
            probeBuffer.append(cs.getName());
            if (geneAnnotations.containsKey(csid)) {
                String[] annotationStrings = geneAnnotations.get(csid);
                /*
                     * Fields:
                     *
                     * 1: gene symbols
                     * 2: gene name
                     * 4: ncbi ID
                     */
                probeBuffer.append("\t").append(annotationStrings[1]).append("\t").append(annotationStrings[2]).append("\t").append(annotationStrings[4]);
            } else {
                probeBuffer.append("\t\t\t");
            }
            probe2String.put(csid, probeBuffer);
        }
        Double correctedPvalue = dear.getCorrectedPvalue();
        Double pvalue = dear.getPvalue();
        String formattedCP = correctedPvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, correctedPvalue);
        String formattedP = pvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pvalue);
        probeBuffer.append("\t").append(formattedCP).append("\t").append(formattedP);
    }
    return sortedFirstColumnOfResults;
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 12 with DifferentialExpressionAnalysisResult

use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalyzerServiceImpl method extendResultSet.

private void extendResultSet(ExpressionAnalysisResultSet oldrs, ExpressionAnalysisResultSet temprs) {
    assert oldrs.getId() != null;
    /*
         * Copy the results over.
         */
    Map<CompositeSequence, DifferentialExpressionAnalysisResult> p2der = new HashMap<>();
    for (DifferentialExpressionAnalysisResult der : oldrs.getResults()) {
        p2der.put(der.getProbe(), der);
    }
    Collection<DifferentialExpressionAnalysisResult> toAdd = new ArrayList<>();
    for (DifferentialExpressionAnalysisResult newr : temprs.getResults()) {
        if (!p2der.containsKey(newr.getProbe())) {
            toAdd.add(newr);
        }
        newr.setResultSet(oldrs);
    }
    if (toAdd.isEmpty()) {
        DifferentialExpressionAnalyzerServiceImpl.log.warn("Somewhat surprisingly, no new results were added");
    } else {
        DifferentialExpressionAnalyzerServiceImpl.log.info(toAdd.size() + " transient results added to the old analysis result set: " + oldrs.getId());
    }
    boolean added = oldrs.getResults().addAll(toAdd);
    assert added;
    assert oldrs.getResults().size() >= toAdd.size();
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 13 with DifferentialExpressionAnalysisResult

use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalyzerServiceImpl method addPvalueDistribution.

private void addPvalueDistribution(ExpressionAnalysisResultSet resultSet) {
    Histogram pvalHist = new Histogram("", 100, 0.0, 1.0);
    for (DifferentialExpressionAnalysisResult result : resultSet.getResults()) {
        Double pvalue = result.getPvalue();
        if (pvalue != null)
            pvalHist.fill(pvalue);
    }
    PvalueDistribution pvd = PvalueDistribution.Factory.newInstance();
    pvd.setNumBins(100);
    ByteArrayConverter bac = new ByteArrayConverter();
    pvd.setBinCounts(bac.doubleArrayToBytes(pvalHist.getArray()));
    // do not save yet.
    resultSet.setPvalueDistribution(pvd);
}
Also used : Histogram(ubic.basecode.math.distribution.Histogram) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) PvalueDistribution(ubic.gemma.model.analysis.expression.diff.PvalueDistribution)

Example 14 with DifferentialExpressionAnalysisResult

use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method analysisResultSetWithContrastsToString.

private String analysisResultSetWithContrastsToString(ExpressionAnalysisResultSet resultSet, Map<Long, String[]> geneAnnotations) {
    StringBuilder buf = new StringBuilder();
    ExperimentalFactor ef = resultSet.getExperimentalFactors().iterator().next();
    // This is a bit risky, we're only looking at the first one. But this is how we do it for the header.
    boolean hasNCBIIDs = !geneAnnotations.isEmpty() && geneAnnotations.values().iterator().next().length > 4;
    if (ef.getType().equals(FactorType.CONTINUOUS)) {
        buf.append("\tCoefficient_").append(StringUtil.makeValidForR(ef.getName())).append("\tPValue_").append(StringUtil.makeValidForR(ef.getName())).append("\n");
        for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
            StringBuilder rowBuffer = new StringBuilder();
            if (geneAnnotations.isEmpty()) {
                rowBuffer.append(dear.getProbe().getName());
            } else {
                this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
            }
            assert dear.getContrasts().size() == 1;
            ContrastResult contrast = dear.getContrasts().iterator().next();
            Double coefficient = contrast.getCoefficient();
            Double pValue = contrast.getPvalue();
            String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
            String formattedCoefficient = coefficient == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, coefficient);
            String contrastData = "\t" + formattedCoefficient + "\t" + formattedPvalue;
            rowBuffer.append(contrastData);
            buf.append(rowBuffer.toString()).append('\n');
        }
    } else {
        Long baselineId = resultSet.getBaselineGroup().getId();
        List<Long> factorValueIdOrder = new ArrayList<>();
        for (FactorValue factorValue : ef.getFactorValues()) {
            if (Objects.equals(factorValue.getId(), baselineId)) {
                continue;
            }
            factorValueIdOrder.add(factorValue.getId());
            // Generate column headers, try to be R-friendly
            buf.append("\tFoldChange_").append(this.getFactorValueString(factorValue));
            buf.append("\tTstat_").append(this.getFactorValueString(factorValue));
            buf.append("\tPValue_").append(this.getFactorValueString(factorValue));
        }
        buf.append('\n');
        // Generate element details
        for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
            StringBuilder rowBuffer = new StringBuilder();
            this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
            Map<Long, String> factorValueIdToData = new HashMap<>();
            // I don't think we can expect them in the same order.
            for (ContrastResult contrast : dear.getContrasts()) {
                Double foldChange = contrast.getLogFoldChange();
                Double pValue = contrast.getPvalue();
                Double tStat = contrast.getTstat();
                String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
                String formattedFoldChange = foldChange == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, foldChange);
                String formattedTState = tStat == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, tStat);
                String contrastData = "\t" + formattedFoldChange + "\t" + formattedTState + "\t" + formattedPvalue;
                assert contrast.getFactorValue() != null;
                factorValueIdToData.put(contrast.getFactorValue().getId(), contrastData);
            }
            // Get them in the right order.
            for (Long factorValueId : factorValueIdOrder) {
                String s = factorValueIdToData.get(factorValueId);
                if (s == null)
                    s = "";
                rowBuffer.append(s);
            }
            buf.append(rowBuffer.toString()).append('\n');
        }
    // resultSet.getResults() loop
    }
    return buf.toString();
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult)

Example 15 with DifferentialExpressionAnalysisResult

use of ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method analysisResultSetsToString.

@Override
public void analysisResultSetsToString(Collection<ExpressionAnalysisResultSet> results, Map<Long, String[]> geneAnnotations, StringBuilder buf) {
    Map<Long, StringBuilder> probe2String = new HashMap<>();
    List<DifferentialExpressionAnalysisResult> sortedFirstColumnOfResults = null;
    for (ExpressionAnalysisResultSet ears : results) {
        sortedFirstColumnOfResults = this.analysisResultSetToString(ears, geneAnnotations, buf, probe2String, sortedFirstColumnOfResults);
    }
    // ears loop
    buf.append("\n");
    if (sortedFirstColumnOfResults == null) {
        throw new IllegalStateException("No results for ");
    }
    // Dump the probe data in the sorted order of the 1st column that we originally sorted
    for (DifferentialExpressionAnalysisResult sortedResult : sortedFirstColumnOfResults) {
        CompositeSequence cs = sortedResult.getProbe();
        StringBuilder sb = probe2String.get(cs.getId());
        if (sb == null) {
            ExpressionDataFileServiceImpl.log.warn("Unable to find element " + cs.getId() + " in map");
            break;
        }
        buf.append(sb);
        buf.append("\n");
    }
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)

Aggregations

DifferentialExpressionAnalysisResult (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult)20 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)14 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)12 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)12 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)11 Test (org.junit.Test)9 ContrastResult (ubic.gemma.model.analysis.expression.diff.ContrastResult)7 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)3 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)3 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)3 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 List (java.util.List)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 StopWatch (org.apache.commons.lang3.time.StopWatch)1 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)1 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)1