Search in sources :

Example 16 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqData.

/*
     * More realistic test of RNA seq. GSE19166
     *

     */
@Test
public void testLoadRNASeqData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        assertEquals(199, targetArrayDesign.getCompositeSequences().size());
        // Main step.
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
    }
    ee = experimentService.thaw(ee);
    // should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
    assertEquals(4, ee.getQuantitationTypes().size());
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertNotNull(ee.getNumberOfDataVectors());
    assertEquals(199, ee.getNumberOfDataVectors().intValue());
    // GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
    // 3949585 3929008 3712314 3693219 3574068 3579631
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
    assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(6, v.getBioAssays().size());
    }
    assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 17 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceImpl method getExpressionLevelsDiffEx.

@Override
@Transactional(readOnly = true)
public Collection<ExperimentExpressionLevelsValueObject> getExpressionLevelsDiffEx(Collection<ExpressionExperiment> ees, Long diffExResultSetId, double threshold, int max, boolean keepGeneNonSpecific, String consolidateMode) {
    Collection<ExperimentExpressionLevelsValueObject> vos = new ArrayList<>();
    // Adapted from DEDV controller
    for (ExpressionExperiment ee : ees) {
        Collection<DoubleVectorValueObject> vectors = this.getDiffExVectors(diffExResultSetId, threshold, max);
        this.addExperimentGeneVectors(vos, ee, vectors, keepGeneNonSpecific, consolidateMode);
    }
    return vos;
}
Also used : ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ExperimentExpressionLevelsValueObject(ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject) Transactional(org.springframework.transaction.annotation.Transactional)

Example 18 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceImpl method getExpressionLevelsPca.

@Override
@Transactional(readOnly = true)
public Collection<ExperimentExpressionLevelsValueObject> getExpressionLevelsPca(Collection<ExpressionExperiment> ees, int limit, int component, boolean keepGeneNonSpecific, String consolidateMode) {
    Collection<ExperimentExpressionLevelsValueObject> vos = new ArrayList<>(ees.size());
    // Adapted from DEDV controller
    for (ExpressionExperiment ee : ees) {
        Collection<DoubleVectorValueObject> vectors = svdService.getTopLoadedVectors(ee.getId(), component, limit).values();
        this.addExperimentGeneVectors(vos, ee, vectors, keepGeneNonSpecific, consolidateMode);
    }
    return vos;
}
Also used : ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ExperimentExpressionLevelsValueObject(ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject) Transactional(org.springframework.transaction.annotation.Transactional)

Example 19 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceImpl method getDiffExVectors.

@Override
public List<DoubleVectorValueObject> getDiffExVectors(Long resultSetId, Double threshold, int maxNumberOfResults) {
    ExpressionAnalysisResultSet ar = differentialExpressionResultService.loadAnalysisResultSet(resultSetId);
    if (ar == null) {
        Log.warn(this.getClass(), "No diff ex result set with ID=" + resultSetId);
        return null;
    }
    differentialExpressionResultService.thawLite(ar);
    BioAssaySet analyzedSet = ar.getAnalysis().getExperimentAnalyzed();
    List<DifferentialExpressionValueObject> ee2probeResults = differentialExpressionResultService.findInResultSet(ar, threshold, maxNumberOfResults, ProcessedExpressionDataVectorServiceImpl.DIFFEX_MIN_NUMBER_OF_RESULTS);
    Collection<Long> probes = new HashSet<>();
    // Map<CompositeSequenceId, pValue>
    // using id instead of entity for map key because want to use a value object for retrieval later
    Map<Long, Double> pvalues = new HashMap<>();
    for (DifferentialExpressionValueObject par : ee2probeResults) {
        probes.add(par.getProbeId());
        pvalues.put(par.getProbeId(), par.getP());
    }
    Collection<DoubleVectorValueObject> processedDataArraysByProbe = this.getProcessedDataArraysByProbeIds(analyzedSet, probes);
    List<DoubleVectorValueObject> dedvs = new ArrayList<>(processedDataArraysByProbe);
    /*
         * Resort
         */
    for (DoubleVectorValueObject v : dedvs) {
        v.setPvalue(pvalues.get(v.getDesignElement().getId()));
    }
    Collections.sort(dedvs, new Comparator<DoubleVectorValueObject>() {

        @Override
        public int compare(DoubleVectorValueObject o1, DoubleVectorValueObject o2) {
            if (o1.getPvalue() == null)
                return -1;
            if (o2.getPvalue() == null)
                return 1;
            return o1.getPvalue().compareTo(o2.getPvalue());
        }
    });
    return dedvs;
}
Also used : DifferentialExpressionValueObject(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionValueObject) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 20 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DiffExTest method testCountData.

/**
 * Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
 */
@Test
public void testCountData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertTrue(eeService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
        assertNotNull(is);
        experimentalDesignImporter.importDesign(ee, is);
    }
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    ArrayDesign targetArrayDesign;
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
        assertEquals(1, experimentalFactors.size());
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        // the experiment has 8 samples but the data has 4 columns so allow missing samples
        // GSM718707 GSM718708 GSM718709 GSM718710
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
    }
    // make sure to do a thawRawAndProcessed() to get the addCountData() updates
    ee = eeService.thaw(ee);
    // verify rows and columns
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
    // I confirmed that log2cpm is working same as voom here; not bothering to test directly.
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    // DE analysis without weights to assist comparison to R
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setUseWeights(false);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    assertNotNull(analyses);
    assertEquals(1, analyses.size());
    DifferentialExpressionAnalysis results = analyses.iterator().next();
    boolean found = false;
    ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            found = true;
            ContrastResult contrast = r.getContrasts().iterator().next();
            assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
            0.00001);
            // up to sign
            assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
            break;
        }
    }
    assertTrue(found);
    // With weights
    config = new DifferentialExpressionAnalysisConfig();
    // <----
    config.setUseWeights(true);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    analyses = analyzer.run(ee, config);
    results = analyses.iterator().next();
    resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            assertEquals(1, r.getContrasts().size());
            ContrastResult contrast = r.getContrasts().iterator().next();
            // yes!
            assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
            assertEquals(0.000311, contrast.getPvalue(), 0.00001);
            assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
            assertEquals(0.007068, r.getPvalue(), 0.00001);
            break;
        }
    }
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)31 StopWatch (org.apache.commons.lang3.time.StopWatch)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)9 Test (org.junit.Test)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 BioAssayValueObject (ubic.gemma.model.expression.bioAssay.BioAssayValueObject)6 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 Gene (ubic.gemma.model.genome.Gene)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)4 ExperimentExpressionLevelsValueObject (ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject)4 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)4 InputStream (java.io.InputStream)3 Transactional (org.springframework.transaction.annotation.Transactional)3 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)3 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 ProbeLoading (ubic.gemma.model.analysis.expression.pca.ProbeLoading)3 VisualizationValueObject (ubic.gemma.web.controller.visualization.VisualizationValueObject)3