Search in sources :

Example 41 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionExperimentDaoImpl method remove.

@Override
public void remove(final ExpressionExperiment ee) {
    if (ee == null)
        throw new IllegalArgumentException();
    Session session = this.getSessionFactory().getCurrentSession();
    try {
        // Note that links and analyses are deleted separately - see the ExpressionExperimentService.
        // At this point, the ee is probably still in the session, as the service already has gotten it
        // in this transaction.
        session.flush();
        session.clear();
        session.buildLockRequest(LockOptions.NONE).lock(ee);
        Hibernate.initialize(ee.getAuditTrail());
        Set<BioAssayDimension> dims = new HashSet<>();
        Set<QuantitationType> qts = new HashSet<>();
        Collection<RawExpressionDataVector> designElementDataVectors = ee.getRawExpressionDataVectors();
        Hibernate.initialize(designElementDataVectors);
        ee.setRawExpressionDataVectors(null);
        /*
             * We don't remove the investigators, just breaking the association.
             */
        ee.getInvestigators().clear();
        int count = 0;
        if (designElementDataVectors != null) {
            count = this.removeDataVectors(session, dims, qts, designElementDataVectors, count);
        }
        Collection<ProcessedExpressionDataVector> processedVectors = ee.getProcessedExpressionDataVectors();
        Hibernate.initialize(processedVectors);
        if (processedVectors != null && processedVectors.size() > 0) {
            ee.setProcessedExpressionDataVectors(null);
            this.removeProcessedVectors(session, dims, qts, count, processedVectors);
        }
        session.flush();
        session.clear();
        session.update(ee);
        AbstractDao.log.info("Removing BioAssay Dimensions ...");
        for (BioAssayDimension dim : dims) {
            dim.getBioAssays().clear();
            session.update(dim);
            session.delete(dim);
        }
        dims.clear();
        session.flush();
        AbstractDao.log.info("Removing Bioassays and biomaterials ...");
        // keep to put back in the object.
        Map<BioAssay, BioMaterial> copyOfRelations = new HashMap<>();
        Collection<BioMaterial> bioMaterialsToDelete = new HashSet<>();
        Collection<BioAssay> bioAssays = ee.getBioAssays();
        this.removeBioAssays(session, copyOfRelations, bioMaterialsToDelete, bioAssays);
        AbstractDao.log.info("Last bits ...");
        // We remove them here in case they are associated to more than one bioassay-- no cascade is possible.
        for (BioMaterial bm : bioMaterialsToDelete) {
            session.delete(bm);
        }
        for (QuantitationType qt : qts) {
            session.delete(qt);
        }
        session.flush();
        session.delete(ee);
        /*
             * Put transient instances back. This is possibly useful for clearing ACLS.
             */
        ee.setProcessedExpressionDataVectors(processedVectors);
        ee.setRawExpressionDataVectors(designElementDataVectors);
        for (BioAssay ba : ee.getBioAssays()) {
            ba.setSampleUsed(copyOfRelations.get(ba));
        }
        AbstractDao.log.info("Deleted " + ee);
    } catch (Exception e) {
        AbstractDao.log.error(e);
    } finally {
        AbstractDao.log.info("Finalising remove method.");
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) NotImplementedException(org.apache.commons.lang.NotImplementedException) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 42 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertVectorsForPlatform.

/**
 * For data coming from a single platform, create vectors.
 *
 * @param values A GeoValues object holding the parsed results.
 */
private void convertVectorsForPlatform(GeoValues values, ExpressionExperiment expExp, List<GeoSample> datasetSamples, GeoPlatform geoPlatform) {
    assert datasetSamples.size() > 0 : "No samples in dataset";
    if (!geoPlatform.useDataFromGeo()) {
        // see bug 4181
        GeoConverterImpl.log.warn("Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" + geoPlatform + ")");
        return;
    }
    GeoConverterImpl.log.info("Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() + " samples.");
    BioAssayDimension bioAssayDimension = this.convertGeoSampleList(datasetSamples, expExp);
    if (bioAssayDimension.getBioAssays().size() == 0)
        throw new IllegalStateException("No bioAssays in the BioAssayDimension");
    this.sanityCheckQuantitationTypes(datasetSamples);
    List<String> quantitationTypes = datasetSamples.iterator().next().getColumnNames();
    List<String> quantitationTypeDescriptions = datasetSamples.iterator().next().getColumnDescriptions();
    boolean first = true;
    for (String quantitationType : quantitationTypes) {
        // skip the first quantitationType, it's the ID or ID_REF.
        if (first) {
            first = false;
            continue;
        }
        int columnAccordingToSample = quantitationTypes.indexOf(quantitationType);
        int quantitationTypeIndex = values.getQuantitationTypeIndex(geoPlatform, quantitationType);
        GeoConverterImpl.log.debug("Processing " + quantitationType + " (column=" + quantitationTypeIndex + " - according to sample, it's " + columnAccordingToSample + ")");
        Map<String, List<Object>> dataVectors = this.makeDataVectors(values, datasetSamples, quantitationTypeIndex);
        if (dataVectors == null || dataVectors.size() == 0) {
            GeoConverterImpl.log.debug("No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")");
            continue;
        }
        GeoConverterImpl.log.info(dataVectors.size() + " data vectors for " + quantitationType);
        Object exampleValue = dataVectors.values().iterator().next().iterator().next();
        QuantitationType qt = QuantitationType.Factory.newInstance();
        qt.setName(quantitationType);
        String description = quantitationTypeDescriptions.get(columnAccordingToSample);
        qt.setDescription(description);
        QuantitationTypeParameterGuesser.guessQuantitationTypeParameters(qt, quantitationType, description, exampleValue);
        int count = 0;
        int skipped = 0;
        for (String designElementName : dataVectors.keySet()) {
            List<Object> dataVector = dataVectors.get(designElementName);
            if (dataVector == null || dataVector.size() == 0)
                continue;
            RawExpressionDataVector vector = this.convertDesignElementDataVector(geoPlatform, expExp, bioAssayDimension, designElementName, dataVector, qt);
            if (vector == null) {
                skipped++;
                if (GeoConverterImpl.log.isDebugEnabled())
                    GeoConverterImpl.log.debug("Null vector for DE=" + designElementName + " QT=" + quantitationType);
                continue;
            }
            if (GeoConverterImpl.log.isTraceEnabled()) {
                GeoConverterImpl.log.trace(designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " + dataVector.size() + " elements in vector");
            }
            expExp.getRawExpressionDataVectors().add(vector);
            if (++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log.isDebugEnabled()) {
                GeoConverterImpl.log.debug(count + " Data vectors added");
            }
        }
        if (count > 0) {
            expExp.getQuantitationTypes().add(qt);
            if (GeoConverterImpl.log.isDebugEnabled() && count > 1000) {
                GeoConverterImpl.log.debug(count + " Data vectors added for '" + quantitationType + "'");
            }
        } else {
            GeoConverterImpl.log.info("No vectors were retained for " + quantitationType + " -- usually this is due to all values being missing.");
        }
        if (skipped > 0) {
            GeoConverterImpl.log.info("Skipped " + skipped + " vectors");
        }
    }
    GeoConverterImpl.log.info("Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform + ", " + expExp.getQuantitationTypes().size() + " quantitation types.");
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 43 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertDesignElementDataVector.

private RawExpressionDataVector convertDesignElementDataVector(GeoPlatform geoPlatform, ExpressionExperiment expExp, BioAssayDimension bioAssayDimension, String designElementName, List<Object> dataVector, QuantitationType qt) {
    if (dataVector == null || dataVector.size() == 0)
        return null;
    int numValuesExpected = bioAssayDimension.getBioAssays().size();
    if (dataVector.size() != numValuesExpected) {
        throw new IllegalArgumentException("Expected " + numValuesExpected + " in bioassaydimension, data contains " + dataVector.size());
    }
    byte[] blob = this.convertData(dataVector, qt);
    if (blob == null) {
        // all missing etc.
        if (GeoConverterImpl.log.isDebugEnabled())
            GeoConverterImpl.log.debug("All missing values for DE=" + designElementName + " QT=" + qt);
        return null;
    }
    if (GeoConverterImpl.log.isDebugEnabled()) {
        GeoConverterImpl.log.debug(blob.length + " bytes for " + dataVector.size() + " raw elements");
    }
    ArrayDesign p = this.convertPlatform(geoPlatform);
    assert p != null;
    Map<String, CompositeSequence> designMap = platformDesignElementMap.get(p.getShortName());
    assert designMap != null;
    /*
         * Replace name with the one we're using in the array design after conversion. This information gets filled in
         * earlier in the conversion process (see GeoService)
         */
    String mappedName = geoPlatform.getProbeNamesInGemma().get(designElementName);
    if (mappedName == null) {
        // Sigh..this is unlikely to work in general, but see bug 1709.
        mappedName = geoPlatform.getProbeNamesInGemma().get(designElementName.toUpperCase());
    }
    if (mappedName == null) {
        throw new IllegalStateException("There is  no probe matching " + designElementName);
    }
    CompositeSequence compositeSequence = designMap.get(mappedName);
    if (compositeSequence == null)
        throw new IllegalStateException("No composite sequence " + designElementName);
    if (compositeSequence.getBiologicalCharacteristic() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase().getName() == null) {
        // this is obscure.
        throw new IllegalStateException(compositeSequence + " sequence accession external database lacks name");
    }
    if (GeoConverterImpl.log.isDebugEnabled())
        GeoConverterImpl.log.debug("Associating " + compositeSequence + " with dedv");
    RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
    vector.setDesignElement(compositeSequence);
    vector.setExpressionExperiment(expExp);
    vector.setBioAssayDimension(bioAssayDimension);
    vector.setQuantitationType(qt);
    vector.setData(blob);
    return vector;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 44 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class DataUpdater method makeNewVectors.

private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
    ByteArrayConverter bArrayConverter = new ByteArrayConverter();
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
    assert bioAssayDimension != null;
    assert !bioAssayDimension.getBioAssays().isEmpty();
    bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
    assert !bioAssayDimension.getBioAssays().isEmpty();
    for (int i = 0; i < data.rows(); i++) {
        byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
        RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
        vector.setData(bdata);
        CompositeSequence cs = data.getRowElement(i).getDesignElement();
        if (cs == null) {
            continue;
        }
        if (!cs.getArrayDesign().equals(targetPlatform)) {
            throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
        }
        vector.setDesignElement(cs);
        vector.setQuantitationType(qt);
        vector.setExpressionExperiment(ee);
        vector.setBioAssayDimension(bioAssayDimension);
        vectors.add(vector);
    }
    return vectors;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 45 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class DataUpdater method reprocessAffyThreePrimeArrayData.

/**
 * @param ee ee
 * @return This replaces the existing raw data with the CEL file data. CEL file(s) must be found by configuration
 */
// Possible external use
@SuppressWarnings("UnusedReturnValue")
public ExpressionExperiment reprocessAffyThreePrimeArrayData(ExpressionExperiment ee) {
    Collection<ArrayDesign> arrayDesignsUsed = this.experimentService.getArrayDesignsUsed(ee);
    ee = experimentService.thawLite(ee);
    RawDataFetcher f = new RawDataFetcher();
    Collection<LocalFile> files = f.fetch(ee.getAccession().getAccession());
    if (files.isEmpty()) {
        throw new RuntimeException("Data was apparently not available");
    }
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    // Use the same QT for each one
    QuantitationType qt = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
    qt = quantitationTypeService.create(qt);
    for (ArrayDesign ad : arrayDesignsUsed) {
        DataUpdater.log.info("Processing data for " + ad);
        String cdfFileName = this.findCdf(ad).getAbsolutePath();
        ad = arrayDesignService.thaw(ad);
        AffyPowerToolsProbesetSummarize apt = new AffyPowerToolsProbesetSummarize(qt);
        vectors.addAll(apt.processThreeprimeArrayData(ee, cdfFileName, ad, files));
    }
    if (vectors.isEmpty()) {
        throw new IllegalStateException("No vectors were returned for " + ee);
    }
    ee = experimentService.replaceRawVectors(ee, vectors);
    this.audit(ee, "Data vector computation from CEL files using AffyPowerTools for " + StringUtils.join(arrayDesignsUsed, "; "), true);
    if (arrayDesignsUsed.size() == 1) {
        this.postprocess(ee);
    } else {
        DataUpdater.log.warn("Skipping postprocessing for mult-platform experiment");
    }
    return ee;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) AffyPowerToolsProbesetSummarize(ubic.gemma.core.loader.expression.AffyPowerToolsProbesetSummarize) LocalFile(ubic.gemma.model.common.description.LocalFile) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) RawDataFetcher(ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher)

Aggregations

RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)53 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)16 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)16 Test (org.junit.Test)15 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)9 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 InputStream (java.io.InputStream)6 Collection (java.util.Collection)6 HashSet (java.util.HashSet)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)6 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 Transactional (org.springframework.transaction.annotation.Transactional)4