Search in sources :

Example 81 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class DataUpdater method addAffyExonArrayData.

/**
 * Use when we want to avoid downloading the CEL files etc. For example if GEO doesn't have them and we ran
 * apt-probeset-summarize ourselves. Must be single-platform
 *
 * @param ee                  ee
 * @param pathToAptOutputFile file
 */
public void addAffyExonArrayData(ExpressionExperiment ee, String pathToAptOutputFile) throws IOException {
    Collection<ArrayDesign> ads = experimentService.getArrayDesignsUsed(ee);
    if (ads.size() > 1) {
        throw new IllegalArgumentException("Can't handle experiments with more than one platform when passing APT output file");
    }
    ArrayDesign ad = ads.iterator().next();
    ad = arrayDesignService.thaw(ad);
    ee = experimentService.thawLite(ee);
    Taxon primaryTaxon = ad.getPrimaryTaxon();
    ArrayDesign targetPlatform = this.prepareTargetPlatformForExonArrays(primaryTaxon);
    AffyPowerToolsProbesetSummarize apt = new AffyPowerToolsProbesetSummarize();
    Collection<RawExpressionDataVector> vectors = apt.processData(ee, pathToAptOutputFile, targetPlatform);
    if (vectors.isEmpty()) {
        throw new IllegalStateException("No vectors were returned for " + ee);
    }
    experimentService.replaceRawVectors(ee, vectors);
    if (!targetPlatform.equals(ad)) {
        AuditEventType eventType = ExpressionExperimentPlatformSwitchEvent.Factory.newInstance();
        auditTrailService.addUpdateEvent(ee, eventType, "Switched in course of updating vectors using AffyPowerTools (from " + ad.getShortName() + " to " + targetPlatform.getShortName() + ")");
    }
    this.audit(ee, "Data vector input from APT output file " + pathToAptOutputFile + " on " + targetPlatform, true);
    this.postprocess(ee);
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) AffyPowerToolsProbesetSummarize(ubic.gemma.core.loader.expression.AffyPowerToolsProbesetSummarize)

Example 82 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class DataUpdater method log2cpmFromCounts.

/**
 * For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
 * get log2cpm computed when loaded.
 *
 * @param ee ee
 * @param qt qt
 */
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
    ee = experimentService.thawLite(ee);
    /*
         * Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
         * We need to do this from the Raw data, not the data that has been normalized etc.
         */
    Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
    ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
    try {
        /*
             * Get the count data quantitation type and make it non-preferred
             */
        qt.setIsPreferred(false);
        qtService.update(qt);
        // so updated QT is attached.
        ee = experimentService.thawLite(ee);
        QuantitationType log2cpmQt = this.makelog2cpmQt();
        DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
        ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
        assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
        Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
        if (platforms.size() > 1)
            throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
        this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
    } catch (Exception e) {
        DataUpdater.log.error(e, e);
        // try to recover.
        qt.setIsPreferred(true);
        qtService.update(qt);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ConfigurationException(org.apache.commons.configuration.ConfigurationException) PreprocessingException(ubic.gemma.core.analysis.preprocess.PreprocessingException) IOException(java.io.IOException)

Example 83 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class DataUpdater method addAffyExonArrayData.

/**
 * Replaces any existing "preferred" data. Must be a single-platform study
 *
 * @param ee ee
 * @param ad ad
 */
// Possible external use
@SuppressWarnings({ "unused", "WeakerAccess" })
public void addAffyExonArrayData(ExpressionExperiment ee, ArrayDesign ad) {
    RawDataFetcher f = new RawDataFetcher();
    Collection<LocalFile> files = f.fetch(ee.getAccession().getAccession());
    if (files.isEmpty()) {
        throw new RuntimeException("Data was apparently not available");
    }
    ad = arrayDesignService.thaw(ad);
    ee = experimentService.thawLite(ee);
    Taxon primaryTaxon = ad.getPrimaryTaxon();
    ArrayDesign targetPlatform = this.prepareTargetPlatformForExonArrays(primaryTaxon);
    assert !targetPlatform.getCompositeSequences().isEmpty();
    AffyPowerToolsProbesetSummarize apt = new AffyPowerToolsProbesetSummarize();
    Collection<RawExpressionDataVector> vectors = apt.processExonArrayData(ee, targetPlatform, files);
    if (vectors.isEmpty()) {
        throw new IllegalStateException("No vectors were returned for " + ee);
    }
    ee = experimentService.replaceRawVectors(ee, vectors);
    if (!targetPlatform.equals(ad)) {
        AuditEventType eventType = ExpressionExperimentPlatformSwitchEvent.Factory.newInstance();
        auditTrailService.addUpdateEvent(ee, eventType, "Switched in course of updating vectors using AffyPowerTools (from " + ad.getShortName() + " to " + targetPlatform.getShortName() + ")");
    }
    this.audit(ee, "Data vector computation from CEL files using AffyPowerTools for " + targetPlatform, true);
    this.postprocess(ee);
}
Also used : LocalFile(ubic.gemma.model.common.description.LocalFile) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) AffyPowerToolsProbesetSummarize(ubic.gemma.core.loader.expression.AffyPowerToolsProbesetSummarize) RawDataFetcher(ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher)

Example 84 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class DataUpdater method replaceData.

/**
 * Replace the data associated with the experiment (or add it if there is none). These data become the 'preferred'
 * quantitation type. Note that this replaces the "raw" data.
 * Similar to AffyPowerToolsProbesetSummarize.convertDesignElementDataVectors and code in
 * SimpleExpressionDataLoaderService.
 *
 * @param ee             the experiment to be modified
 * @param targetPlatform the platform for the new data (this can only be used for single-platform data sets)
 * @param data           the data to be used
 * @return ee
 */
public ExpressionExperiment replaceData(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data) {
    Collection<ArrayDesign> ads = experimentService.getArrayDesignsUsed(ee);
    if (ads.size() > 1) {
        throw new IllegalArgumentException("Can only replace data for an experiment that uses one platform; " + "you must switch/merge first and then provide appropriate replacement data.");
    }
    if (data.rows() == 0) {
        throw new IllegalArgumentException("Data had no rows");
    }
    ArrayDesign originalArrayDesign = ads.iterator().next();
    Collection<QuantitationType> qts = data.getQuantitationTypes();
    if (qts.size() > 1) {
        throw new IllegalArgumentException("Only supports a single quantitation type");
    }
    if (qts.isEmpty()) {
        throw new IllegalArgumentException("Please supply a quantitation type with the data");
    }
    QuantitationType qt = qts.iterator().next();
    qt.setIsPreferred(true);
    Collection<RawExpressionDataVector> vectors = this.makeNewVectors(ee, targetPlatform, data, qt);
    if (vectors.isEmpty()) {
        throw new IllegalStateException("no vectors!");
    }
    /*
         * remove all analyses, etc.
         */
    analysisUtilService.deleteOldAnalyses(ee);
    ee = experimentService.replaceRawVectors(ee, vectors);
    // audit if we switched platforms.
    if (!targetPlatform.equals(originalArrayDesign)) {
        AuditEventType eventType = ExpressionExperimentPlatformSwitchEvent.Factory.newInstance();
        auditTrailService.addUpdateEvent(ee, eventType, "Switched in course of updating vectors using data input (from " + originalArrayDesign.getShortName() + " to " + targetPlatform.getShortName() + ")");
    }
    this.audit(ee, "Data vector replacement for " + targetPlatform, true);
    experimentService.update(ee);
    ee = this.postprocess(ee);
    assert ee.getNumberOfDataVectors() != null;
    // debug code.
    for (BioAssay ba : ee.getBioAssays()) {
        assert ba.getArrayDesignUsed().equals(targetPlatform);
    }
    return ee;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 85 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class TableMaintenanceUtilImpl method updateGene2CsEntries.

@Override
@Transactional
public synchronized void updateGene2CsEntries() {
    if (TableMaintenanceUtilImpl.running.get())
        return;
    TableMaintenanceUtilImpl.log.debug("Running Gene2CS status check");
    String annotation = "";
    try {
        TableMaintenanceUtilImpl.running.set(true);
        Gene2CsStatus status = this.getLastGene2CsUpdateStatus();
        boolean needToRefresh = false;
        if (status == null) {
            needToRefresh = true;
        }
        if (!needToRefresh) {
            Collection<Auditable> newObj = auditEventService.getNewSinceDate(status.getLastUpdate());
            for (Auditable a : newObj) {
                if (a instanceof ArrayDesign) {
                    needToRefresh = true;
                    annotation = a + " is new since " + status.getLastUpdate();
                    TableMaintenanceUtilImpl.log.debug(annotation);
                    break;
                }
            }
        }
        if (!needToRefresh) {
            Collection<Auditable> updatedObj = auditEventService.getUpdatedSinceDate(status.getLastUpdate());
            for (Auditable a : updatedObj) {
                if (a instanceof ArrayDesign) {
                    for (AuditEvent ae : auditEventService.getEvents(a)) {
                        if (ae == null)
                            // legacy of ordered-list which could end up with gaps; should
                            continue;
                        // not be needed any more
                        if (ae.getEventType() != null && ae.getEventType() instanceof ArrayDesignGeneMappingEvent && ae.getDate().after(status.getLastUpdate())) {
                            needToRefresh = true;
                            annotation = a + " had probe mapping done since: " + status.getLastUpdate();
                            TableMaintenanceUtilImpl.log.debug(annotation);
                            break;
                        }
                    }
                }
                if (needToRefresh)
                    break;
            }
        }
        if (needToRefresh) {
            TableMaintenanceUtilImpl.log.debug("Update of GENE2CS initiated");
            this.generateGene2CsEntries();
            Gene2CsStatus updatedStatus = this.writeUpdateStatus(annotation, null);
            this.sendEmail(updatedStatus);
        } else {
            TableMaintenanceUtilImpl.log.debug("No update of GENE2CS needed");
        }
    } catch (Exception e) {
        try {
            TableMaintenanceUtilImpl.log.info("Error during attempt to check status or update GENE2CS", e);
            Gene2CsStatus updatedStatus = this.writeUpdateStatus(annotation, e);
            this.sendEmail(updatedStatus);
        } catch (IOException e1) {
            throw new RuntimeException(e1);
        }
    } finally {
        TableMaintenanceUtilImpl.running.set(false);
    }
}
Also used : Auditable(ubic.gemma.model.common.Auditable) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Gene2CsStatus(ubic.gemma.persistence.model.Gene2CsStatus) AuditEvent(ubic.gemma.model.common.auditAndSecurity.AuditEvent) ArrayDesignGeneMappingEvent(ubic.gemma.model.common.auditAndSecurity.eventType.ArrayDesignGeneMappingEvent) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)186 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)43 Test (org.junit.Test)32 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)26 InputStream (java.io.InputStream)25 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)24 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)24 Taxon (ubic.gemma.model.genome.Taxon)23 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)19 HashSet (java.util.HashSet)16 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 Collection (java.util.Collection)14 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)13 StopWatch (org.apache.commons.lang3.time.StopWatch)12 Before (org.junit.Before)12 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 GZIPInputStream (java.util.zip.GZIPInputStream)8 SimpleExpressionExperimentMetaData (ubic.gemma.core.loader.expression.simple.model.SimpleExpressionExperimentMetaData)8 File (java.io.File)7