Search in sources :

Example 36 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class DiffExTest method testGSE35930.

/**
 * Test where probes have constant values. See bug 3177.
 */
@Test
public void testGSE35930() throws Exception {
    ExpressionExperiment ee;
    // eeService.remove( eeService.findByShortName( "GSE35930" ) );
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("GSE35930")));
        Collection<?> results = geoService.fetchAndLoad("GSE35930", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        // OK.
        if (e.getData() instanceof List) {
            ee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
        } else {
            ee = (ExpressionExperiment) e.getData();
        }
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    if (ee.getExperimentalDesign().getExperimentalFactors().isEmpty()) {
        ee = eeService.load(ee.getId());
        ee = this.eeService.thawLite(ee);
        try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE35930/design.txt")) {
            experimentalDesignImporter.importDesign(ee, is);
        }
        ee = eeService.load(ee.getId());
        ee = this.eeService.thawLite(ee);
    }
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    assertNotNull(analyses);
    assertEquals(1, analyses.size());
    DifferentialExpressionAnalysis results = analyses.iterator().next();
    boolean found = false;
    ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        // this probe has a constant value
        if (r.getProbe().getName().equals("1622910_at")) {
            fail("Should not have found a result for constant probe");
        // found = true;
        // assertTrue( "Got: " + pvalue, pvalue == null || pvalue.equals( Double.NaN ) );
        } else {
            // got to have something...
            found = true;
        }
    }
    assertTrue(found);
}
Also used : InputStream(java.io.InputStream) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) List(java.util.List) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 37 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class DiffExTest method testCountData.

/**
 * Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
 */
@Test
public void testCountData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertTrue(eeService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
        assertNotNull(is);
        experimentalDesignImporter.importDesign(ee, is);
    }
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    ArrayDesign targetArrayDesign;
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
        assertEquals(1, experimentalFactors.size());
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        // the experiment has 8 samples but the data has 4 columns so allow missing samples
        // GSM718707 GSM718708 GSM718709 GSM718710
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
    }
    // make sure to do a thawRawAndProcessed() to get the addCountData() updates
    ee = eeService.thaw(ee);
    // verify rows and columns
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
    // I confirmed that log2cpm is working same as voom here; not bothering to test directly.
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    // DE analysis without weights to assist comparison to R
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setUseWeights(false);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    assertNotNull(analyses);
    assertEquals(1, analyses.size());
    DifferentialExpressionAnalysis results = analyses.iterator().next();
    boolean found = false;
    ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            found = true;
            ContrastResult contrast = r.getContrasts().iterator().next();
            assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
            0.00001);
            // up to sign
            assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
            break;
        }
    }
    assertTrue(found);
    // With weights
    config = new DifferentialExpressionAnalysisConfig();
    // <----
    config.setUseWeights(true);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    analyses = analyzer.run(ee, config);
    results = analyses.iterator().next();
    resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            assertEquals(1, r.getContrasts().size());
            ContrastResult contrast = r.getContrasts().iterator().next();
            // yes!
            assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
            assertEquals(0.000311, contrast.getPvalue(), 0.00001);
            assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
            assertEquals(0.007068, r.getPvalue(), 0.00001);
            break;
        }
    }
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 38 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class ExonArrayDataAddIntegrationTest method testAddAffyExonArrayDataExpressionExperiment.

@Test
public void testAddAffyExonArrayDataExpressionExperiment() throws Exception {
    if (!hasApt) {
        log.warn("Test skipped due to lack of Affy Power Tools executable");
        return;
    }
    ExpressionExperiment ee;
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
        Collection<?> results = geoService.fetchAndLoad("GSE12135", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    /*
         * Add the raw data.
         */
    dataUpdater.addAffyExonArrayData(ee);
    experimentService.load(ee.getId());
}
Also used : AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 39 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class GeoServiceImpl method checkSamplesAreNew.

/**
 * Another common case, typified by samples in GSE3193. We must confirm that all samples included in the data set
 * are not included in other data sets. In GEO this primarily occurs in 'superseries' that combine other series.
 */
private void checkSamplesAreNew(GeoSeries series) {
    Collection<GeoSample> toSkip = new HashSet<>();
    for (GeoSample sample : series.getSamples()) {
        if (!sample.appearsInMultipleSeries()) {
            // nothing to worry about: if this series is not loaded, then we're guaranteed to be new.
            continue;
        }
        Collection<BioAssay> existingBioAssays = bioAssayService.findByAccession(sample.getGeoAccession());
        for (BioAssay ba : existingBioAssays) {
            DatabaseEntry acc = ba.getAccession();
            if (acc == null)
                continue;
            String sampleId = sample.getGeoAccession();
            String existingAcc = acc.getAccession();
            if (existingAcc.equals(sampleId) && ba.getAccession().getExternalDatabase().getName().equals(GeoServiceImpl.GEO_DB_NAME)) {
                AbstractGeoService.log.debug(sampleId + " appears in an expression experiment already in the system, skipping");
                toSkip.add(sample);
            }
        }
    }
    if (!toSkip.isEmpty()) {
        AbstractGeoService.log.info("Found " + toSkip.size() + " samples that are already in the system; they will be removed from the new set (example: " + toSkip.iterator().next().getGeoAccession() + ")");
    }
    for (GeoSample gs : toSkip) {
        series.getSamples().remove(gs);
        series.getSampleCorrespondence().removeSample(gs.getGeoAccession());
    }
    for (GeoDataset gds : series.getDatasets()) {
        for (GeoSubset gSub : gds.getSubsets()) {
            for (GeoSample gs : toSkip) {
                gSub.getSamples().remove(gs);
            }
        }
    }
    // update the description, so we keep some kind of record.
    if (toSkip.size() > 0) {
        series.setSummaries(series.getSummaries() + "\nNote: " + toSkip.size() + " samples from this series, which appear in other Expression Experiments in Gemma, " + "were not imported from the GEO source. The following samples were removed: " + StringUtils.join(toSkip, ","));
    }
    if (series.getSamples().size() == 0) {
        throw new AlreadyExistsInSystemException("All the samples in " + series + " are in the system already (in other ExpressionExperiments)");
    }
    if (series.getSamples().size() < 2) /* we don't really have a lower limit set anywhere else */
    {
        throw new IllegalStateException("After removing samples already in the system, this data set is too small to load: " + series.getSamples().size() + " left (removed " + toSkip.size() + ")");
    }
    AbstractGeoService.log.info("Series now contains " + series.getSamples().size() + " (removed " + toSkip.size() + ")");
}
Also used : DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 40 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class GeoServiceImpl method checkForExisting.

private void checkForExisting(Collection<DatabaseEntry> projectedAccessions) {
    if (projectedAccessions == null || projectedAccessions.size() == 0) {
        // that's okay, it might have been a GPL.
        return;
    }
    for (DatabaseEntry entry : projectedAccessions) {
        Collection<ExpressionExperiment> existing = expressionExperimentService.findByAccession(entry);
        if (!existing.isEmpty()) {
            String message = "There is already an expression experiment that matches " + entry.getAccession();
            AbstractGeoService.log.info(message);
            throw new AlreadyExistsInSystemException(message, existing);
        }
    }
}
Also used : DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Aggregations

AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)41 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)33 Test (org.junit.Test)29 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)29 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)10 Collection (java.util.Collection)9 HashSet (java.util.HashSet)8 Before (org.junit.Before)8 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)8 InputStream (java.io.InputStream)7 List (java.util.List)7 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)6 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)5 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)5 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)4 File (java.io.File)3 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)3