Search in sources :

Example 6 with GeoDomainObjectGenerator

use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.

the class LoadExpressionDataCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception err = this.processCommandLine(args);
    if (err != null) {
        return err;
    }
    try {
        GeoService geoService = this.getBean(GeoService.class);
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
        if (accessions == null && accessionFile == null) {
            return new IllegalArgumentException("You must specific either a file or accessions on the command line");
        }
        if (accessions != null) {
            AbstractCLI.log.info("Got accession(s) from command line " + accessions);
            String[] accsToRun = StringUtils.split(accessions, ',');
            for (String accession : accsToRun) {
                accession = StringUtils.strip(accession);
                if (StringUtils.isBlank(accession)) {
                    continue;
                }
                if (platformOnly) {
                    Collection<?> designs = geoService.fetchAndLoad(accession, true, true, false, true, true);
                    ArrayDesignService ads = this.getBean(ArrayDesignService.class);
                    for (Object object : designs) {
                        assert object instanceof ArrayDesign;
                        ArrayDesign ad = (ArrayDesign) object;
                        ad = ads.thawLite(ad);
                        successObjects.add(ad.getName() + " (" + ad.getExternalReferences().iterator().next().getAccession() + ")");
                    }
                } else {
                    this.processAccession(geoService, accession);
                }
            }
        }
        if (accessionFile != null) {
            AbstractCLI.log.info("Loading accessions from " + accessionFile);
            InputStream is = new FileInputStream(accessionFile);
            try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
                String accession;
                while ((accession = br.readLine()) != null) {
                    if (StringUtils.isBlank(accession)) {
                        continue;
                    }
                    this.processAccession(geoService, accession);
                }
            }
        }
        this.summarizeProcessing();
    } catch (Exception e) {
        AbstractCLI.log.error(e);
        return e;
    }
    return null;
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) PreprocessingException(ubic.gemma.core.analysis.preprocess.PreprocessingException) FileInputStream(java.io.FileInputStream) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) GeoService(ubic.gemma.core.loader.expression.geo.service.GeoService) BufferedReader(java.io.BufferedReader) ArrayDesignService(ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService)

Example 7 with GeoDomainObjectGenerator

use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.

the class GeoServiceImpl method addElements.

@Override
public ArrayDesign addElements(ArrayDesign targetPlatform) {
    if (!targetPlatform.getCompositeSequences().isEmpty()) {
        throw new IllegalArgumentException("Only call this if you are filling in an empty platform");
    }
    String geoAccession = targetPlatform.getExternalReferences().iterator().next().getAccession();
    Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
    if (platforms.size() == 0) {
        throw new IllegalStateException();
    }
    /*
         * We do this to get a fresh instantiation of GeoConverter (prototype scope)
         */
    GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
    if (this.geoDomainObjectGenerator == null) {
        this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
    } else {
        this.geoDomainObjectGenerator.initialize();
    }
    geoDomainObjectGenerator.setProcessPlatformsOnly(true);
    geoConverter.setForceConvertElements(true);
    Collection<Object> arrayDesigns = geoConverter.convert(platforms);
    Collection<CompositeSequence> els = ((ArrayDesign) arrayDesigns.iterator().next()).getCompositeSequences();
    for (CompositeSequence cs : els) {
        cs.setArrayDesign(targetPlatform);
        cs.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(cs.getBiologicalCharacteristic()));
    }
    AbstractGeoService.log.info("Adding " + els.size() + " elements to " + targetPlatform);
    targetPlatform.getCompositeSequences().addAll(els);
    arrayDesignService.update(targetPlatform);
    this.arrayDesignReportService.generateArrayDesignReport(targetPlatform.getId());
    return targetPlatform;
}
Also used : GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoConverter(ubic.gemma.core.loader.expression.geo.GeoConverter)

Example 8 with GeoDomainObjectGenerator

use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.

the class GeoServiceImpl method fetchAndLoad.

/**
 * Given a GEO GSE or GDS (or GPL, but support might not be complete)
 * <ol>
 * <li>Check that it doesn't already exist in the system</li>
 * <li>Download and parse GDS files and GSE file needed</li>
 * <li>Convert the GDS and GSE into a ExpressionExperiment (or just the ArrayDesigns)
 * <li>Load the resulting ExpressionExperiment and/or ArrayDesigns into Gemma</li>
 * </ol>
 */
@Override
public Collection<?> fetchAndLoad(String geoAccession, boolean loadPlatformOnly, boolean doSampleMatching, boolean splitByPlatform, boolean allowSuperSeriesImport, boolean allowSubSeriesImport) {
    /*
         * We do this to get a fresh instantiation of GeoConverter (prototype scope)
         */
    GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
    if (this.geoDomainObjectGenerator == null) {
        this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
    } else {
        this.geoDomainObjectGenerator.initialize();
    }
    geoDomainObjectGenerator.setProcessPlatformsOnly(geoAccession.startsWith("GPL") || loadPlatformOnly);
    geoDomainObjectGenerator.setDoSampleMatching(doSampleMatching && !splitByPlatform);
    Collection<DatabaseEntry> projectedAccessions = geoDomainObjectGenerator.getProjectedAccessions(geoAccession);
    this.checkForExisting(projectedAccessions);
    if (loadPlatformOnly) {
        Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
        if (platforms.size() == 0) {
            AbstractGeoService.log.warn("GeoService.fetchAndLoad( targetPlatformAcc, true, false, false, false );t no results");
            return null;
        }
        geoConverter.setForceConvertElements(true);
        Collection<Object> arrayDesigns = geoConverter.convert(platforms);
        return persisterHelper.persist(arrayDesigns);
    }
    Collection<? extends GeoData> parseResult = geoDomainObjectGenerator.generate(geoAccession);
    if (parseResult.size() == 0) {
        AbstractGeoService.log.warn("Got no results");
        return null;
    }
    AbstractGeoService.log.debug("Generated GEO domain objects for " + geoAccession);
    Object obj = parseResult.iterator().next();
    if (!(obj instanceof GeoSeries)) {
        throw new RuntimeException("Got a " + obj.getClass().getName() + " instead of a " + GeoSeries.class.getName() + " (you may need to load platforms only).");
    }
    GeoSeries series = (GeoSeries) obj;
    String seriesAccession = series.getGeoAccession();
    if (series.isSuperSeries()) {
        if (allowSuperSeriesImport) {
            AbstractGeoService.log.info(" ========= SuperSeries Detected! =========");
            AbstractGeoService.log.info("Please make sure you want to import this as a superseries and not the individual subseries");
        } else {
            throw new IllegalStateException("SuperSeries detected, set 'allowSuperSeriesImport' to 'true' to allow this dataset to load");
        }
    }
    if (series.isSubSeries()) {
        if (allowSubSeriesImport) {
            AbstractGeoService.log.info(" ========= Subseries Detected! =========");
            AbstractGeoService.log.info("Please make sure you want to import this as a subseries and not the superseries");
        } else {
            throw new IllegalStateException("SubSeries detected, set 'allowSubSeriesImport' to 'true' to allow this dataset to load");
        }
    }
    this.confirmPlatformUniqueness(series, doSampleMatching && !splitByPlatform);
    ArrayDesignsForExperimentCache c = new ArrayDesignsForExperimentCache();
    this.matchToExistingPlatforms(geoConverter, series, c);
    this.checkSamplesAreNew(series);
    this.getSubSeriesInformation(series);
    geoConverter.clear();
    geoConverter.setSplitByPlatform(splitByPlatform);
    // noinspection unchecked
    Collection<ExpressionExperiment> result = (Collection<ExpressionExperiment>) geoConverter.convert(series);
    this.check(result);
    this.getPubMedInfo(result);
    AbstractGeoService.log.debug("Converted " + seriesAccession);
    assert persisterHelper != null;
    Collection<ExpressionExperiment> persistedResult = new HashSet<>();
    for (ExpressionExperiment ee : result) {
        c = expressionExperimentPrePersistService.prepare(ee, c);
        ee = persisterHelper.persist(ee, c);
        persistedResult.add(ee);
        AbstractGeoService.log.debug("Persisted " + seriesAccession);
    }
    this.updateReports(persistedResult);
    return persistedResult;
}
Also used : DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ArrayDesignsForExperimentCache(ubic.gemma.persistence.util.ArrayDesignsForExperimentCache) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) GeoConverter(ubic.gemma.core.loader.expression.geo.GeoConverter)

Example 9 with GeoDomainObjectGenerator

use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.

the class MeanVarianceServiceTest method testServiceCreateCountData.

@Test
public final void testServiceCreateCountData() throws Exception {
    // so it doesn't look for soft files
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertNull(eeService.findByShortName("GSE29006"));
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    qt = this.createOrUpdateQt(ScaleType.COUNT);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        // we have to find the right generic platform to use.
        ArrayDesign targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        try {
            dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
            fail("Should have gotten an exception");
        } catch (IllegalArgumentException e) {
        // Expected
        }
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
    }
    ee = eeService.thaw(this.ee);
    assertNotNull(ee.getId());
    MeanVarianceRelation mvr = meanVarianceService.create(ee, true);
    // convert byte[] to array[]
    // warning: order may have changed
    double[] means = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getMeans());
    double[] variances = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getVariances());
    if (means != null) {
        Arrays.sort(means);
    }
    if (variances != null) {
        Arrays.sort(variances);
    }
    // check sizes
    int expectedMeanVarianceLength = 199;
    // NAs removed
    int expectedLowessLength = 197;
    assert means != null;
    assertEquals(expectedMeanVarianceLength, means.length);
    assert variances != null;
    assertEquals(expectedMeanVarianceLength, variances.length);
    int idx = 0;
    assertEquals(1.037011, means[idx], 0.0001);
    assertEquals(0.00023724336, variances[idx], 0.000001);
    idx = expectedLowessLength - 1;
    assertEquals(15.23313, means[idx], 0.0001);
    assertEquals(4.84529, variances[idx], 0.0001);
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) MeanVarianceRelation(ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 10 with GeoDomainObjectGenerator

use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqData.

/*
     * More realistic test of RNA seq. GSE19166
     *

     */
@Test
public void testLoadRNASeqData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        assertEquals(199, targetArrayDesign.getCompositeSequences().size());
        // Main step.
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
    }
    ee = experimentService.thaw(ee);
    // should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
    assertEquals(4, ee.getQuantitationTypes().size());
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertNotNull(ee.getNumberOfDataVectors());
    assertEquals(199, ee.getNumberOfDataVectors().intValue());
    // GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
    // 3949585 3929008 3712314 3693219 3574068 3579631
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
    assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(6, v.getBioAssays().size());
    }
    assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)12 Test (org.junit.Test)7 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)7 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)7 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)6 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)6 InputStream (java.io.InputStream)5 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)2 GeoConverter (ubic.gemma.core.loader.expression.geo.GeoConverter)2 GeoService (ubic.gemma.core.loader.expression.geo.service.GeoService)2 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)2 BufferedReader (java.io.BufferedReader)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 InputStreamReader (java.io.InputStreamReader)1 Collection (java.util.Collection)1 Before (org.junit.Before)1 PreprocessingException (ubic.gemma.core.analysis.preprocess.PreprocessingException)1