Search in sources :

Example 6 with ContrastResult

use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method analysisResultSetWithContrastsToString.

private String analysisResultSetWithContrastsToString(ExpressionAnalysisResultSet resultSet, Map<Long, String[]> geneAnnotations) {
    StringBuilder buf = new StringBuilder();
    ExperimentalFactor ef = resultSet.getExperimentalFactors().iterator().next();
    // This is a bit risky, we're only looking at the first one. But this is how we do it for the header.
    boolean hasNCBIIDs = !geneAnnotations.isEmpty() && geneAnnotations.values().iterator().next().length > 4;
    if (ef.getType().equals(FactorType.CONTINUOUS)) {
        buf.append("\tCoefficient_").append(StringUtil.makeValidForR(ef.getName())).append("\tPValue_").append(StringUtil.makeValidForR(ef.getName())).append("\n");
        for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
            StringBuilder rowBuffer = new StringBuilder();
            if (geneAnnotations.isEmpty()) {
                rowBuffer.append(dear.getProbe().getName());
            } else {
                this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
            }
            assert dear.getContrasts().size() == 1;
            ContrastResult contrast = dear.getContrasts().iterator().next();
            Double coefficient = contrast.getCoefficient();
            Double pValue = contrast.getPvalue();
            String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
            String formattedCoefficient = coefficient == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, coefficient);
            String contrastData = "\t" + formattedCoefficient + "\t" + formattedPvalue;
            rowBuffer.append(contrastData);
            buf.append(rowBuffer.toString()).append('\n');
        }
    } else {
        Long baselineId = resultSet.getBaselineGroup().getId();
        List<Long> factorValueIdOrder = new ArrayList<>();
        for (FactorValue factorValue : ef.getFactorValues()) {
            if (Objects.equals(factorValue.getId(), baselineId)) {
                continue;
            }
            factorValueIdOrder.add(factorValue.getId());
            // Generate column headers, try to be R-friendly
            buf.append("\tFoldChange_").append(this.getFactorValueString(factorValue));
            buf.append("\tTstat_").append(this.getFactorValueString(factorValue));
            buf.append("\tPValue_").append(this.getFactorValueString(factorValue));
        }
        buf.append('\n');
        // Generate element details
        for (DifferentialExpressionAnalysisResult dear : resultSet.getResults()) {
            StringBuilder rowBuffer = new StringBuilder();
            this.addGeneAnnotationsToLine(rowBuffer, dear, hasNCBIIDs, geneAnnotations);
            Map<Long, String> factorValueIdToData = new HashMap<>();
            // I don't think we can expect them in the same order.
            for (ContrastResult contrast : dear.getContrasts()) {
                Double foldChange = contrast.getLogFoldChange();
                Double pValue = contrast.getPvalue();
                Double tStat = contrast.getTstat();
                String formattedPvalue = pValue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pValue);
                String formattedFoldChange = foldChange == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, foldChange);
                String formattedTState = tStat == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, tStat);
                String contrastData = "\t" + formattedFoldChange + "\t" + formattedTState + "\t" + formattedPvalue;
                assert contrast.getFactorValue() != null;
                factorValueIdToData.put(contrast.getFactorValue().getId(), contrastData);
            }
            // Get them in the right order.
            for (Long factorValueId : factorValueIdOrder) {
                String s = factorValueIdToData.get(factorValueId);
                if (s == null)
                    s = "";
                rowBuffer.append(s);
            }
            buf.append(rowBuffer.toString()).append('\n');
        }
    // resultSet.getResults() loop
    }
    return buf.toString();
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult)

Example 7 with ContrastResult

use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.

the class SubsettedAnalysis3Test method test.

@Test
public void test() {
    ee = expressionExperimentService.thawLite(ee);
    Collection<ExperimentalFactor> factors = ee.getExperimentalDesign().getExperimentalFactors();
    assertEquals(3, factors.size());
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(3, ba.getSampleUsed().getFactorValues().size());
    }
    ExperimentalFactor organismpart = null;
    ExperimentalFactor disease = null;
    ExperimentalFactor diseasegroup = null;
    for (ExperimentalFactor ef : factors) {
        switch(ef.getCategory().getValue()) {
            case "study design":
                diseasegroup = ef;
                break;
            case "disease":
                disease = ef;
                break;
            case "organism part":
                organismpart = ef;
                break;
        }
    }
    assertNotNull(diseasegroup);
    assertNotNull(disease);
    assertNotNull(organismpart);
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.getFactorsToInclude().add(disease);
    config.getFactorsToInclude().add(organismpart);
    config.setSubsetFactor(diseasegroup);
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    // a subset for each disease: SZ, PD, HD, ALS, ALZ, MS
    assertEquals(6, analyses.size());
    /*
         * Now, within each we should have only one disease contrast,
         */
    for (DifferentialExpressionAnalysis analysis : analyses) {
        // there should be one for disease - tissue isn't used.
        assertEquals(1, analysis.getResultSets().size());
        for (ExpressionAnalysisResultSet rs : analysis.getResultSets()) {
            ExperimentalFactor factor = rs.getExperimentalFactors().iterator().next();
            // noinspection LoopStatementThatDoesntLoop
            for (DifferentialExpressionAnalysisResult res : rs.getResults()) {
                Collection<ContrastResult> contrasts = res.getContrasts();
                for (ContrastResult cr : contrasts) {
                    log.info(analysis + "   " + factor + " " + cr + " " + res);
                }
                break;
            }
        }
    }
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 8 with ContrastResult

use of ubic.gemma.model.analysis.expression.diff.ContrastResult in project Gemma by PavlidisLab.

the class DiffExTest method testCountData.

/**
 * Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
 */
@Test
public void testCountData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertTrue(eeService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
        assertNotNull(is);
        experimentalDesignImporter.importDesign(ee, is);
    }
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    ArrayDesign targetArrayDesign;
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
        assertEquals(1, experimentalFactors.size());
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        // the experiment has 8 samples but the data has 4 columns so allow missing samples
        // GSM718707 GSM718708 GSM718709 GSM718710
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
    }
    // make sure to do a thawRawAndProcessed() to get the addCountData() updates
    ee = eeService.thaw(ee);
    // verify rows and columns
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
    // I confirmed that log2cpm is working same as voom here; not bothering to test directly.
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    // DE analysis without weights to assist comparison to R
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setUseWeights(false);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    assertNotNull(analyses);
    assertEquals(1, analyses.size());
    DifferentialExpressionAnalysis results = analyses.iterator().next();
    boolean found = false;
    ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            found = true;
            ContrastResult contrast = r.getContrasts().iterator().next();
            assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
            0.00001);
            // up to sign
            assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
            break;
        }
    }
    assertTrue(found);
    // With weights
    config = new DifferentialExpressionAnalysisConfig();
    // <----
    config.setUseWeights(true);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    analyses = analyzer.run(ee, config);
    results = analyses.iterator().next();
    resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            assertEquals(1, r.getContrasts().size());
            ContrastResult contrast = r.getContrasts().iterator().next();
            // yes!
            assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
            assertEquals(0.000311, contrast.getPvalue(), 0.00001);
            assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
            assertEquals(0.007068, r.getPvalue(), 0.00001);
            break;
        }
    }
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

ContrastResult (ubic.gemma.model.analysis.expression.diff.ContrastResult)8 DifferentialExpressionAnalysisResult (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult)7 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)6 Test (org.junit.Test)5 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)5 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)2 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)2 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)1 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)1 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)1 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)1 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)1 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)1 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)1 Gene (ubic.gemma.model.genome.Gene)1