Search in sources :

Example 41 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method mergeVectors.

@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
    Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
    if (arrayDesigns.size() > 1) {
        throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
    }
    ee = expressionExperimentService.thaw(ee);
    Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
    VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
    /*
         * Load all the bioassay dimensions, which will be merged.
         */
    Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
        for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
            if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
                // not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
                continue;
            }
            allOldBioAssayDims.add(bioAssayDim);
        }
    }
    if (allOldBioAssayDims.size() == 0) {
        throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
    }
    if (allOldBioAssayDims.size() == 1) {
        VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
        return ee;
    }
    VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
    List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
    BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
    int totalBioAssays = newBioAd.getBioAssays().size();
    assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
    /*
         * This will run into problems if there are excess quantitation types
         */
    int numSuccessfulMergers = 0;
    for (QuantitationType type : qt2Vec.keySet()) {
        Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
        if (oldVecs.isEmpty()) {
            VectorMergingServiceImpl.log.warn("No vectors for " + type);
            continue;
        }
        Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
        if (deVMap == null) {
            VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
            continue;
        }
        VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors  for " + type);
        Collection<RawExpressionDataVector> newVectors = new HashSet<>();
        int numAllMissing = 0;
        int missingValuesForQt = 0;
        for (CompositeSequence de : deVMap.keySet()) {
            RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
            Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
            /*
                 * these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
                 * old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
                 * the values for that dimension with missing data. We go through the dimensions in the same order that
                 * we joined them up.
                 */
            List<Object> data = new ArrayList<>();
            int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
            missingValuesForQt += totalMissingInVector;
            if (totalMissingInVector == totalBioAssays) {
                numAllMissing++;
                // we don't save data that is all missing.
                continue;
            }
            if (data.size() != totalBioAssays) {
                throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
            }
            byte[] newDataAr = converter.toBytes(data.toArray());
            vector.setData(newDataAr);
            newVectors.add(vector);
        }
        // TRANSACTION
        vectorMergingHelperService.persist(ee, type, newVectors);
        if (numAllMissing > 0) {
            VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
        }
        if (missingValuesForQt > 0) {
            VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
        }
        VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
        rawExpressionDataVectorService.remove(oldVecs);
        ee.getRawExpressionDataVectors().removeAll(oldVecs);
        numSuccessfulMergers++;
    }
    if (numSuccessfulMergers == 0) {
        /*
             * Try to clean up
             */
        this.bioAssayDimensionService.remove(newBioAd);
        throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
    }
    expressionExperimentService.update(ee);
    // Several transactions
    this.cleanUp(ee, allOldBioAssayDims, newBioAd);
    // transaction
    this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
    // several transactions
    try {
        preprocessorService.process(ee);
    } catch (PreprocessingException e) {
        VectorMergingServiceImpl.log.error("Error during postprocessing", e);
    }
    return ee;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 42 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testReorder.

@Test
public void testReorder() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE404");
    if (old != null) {
        eeService.remove(old);
    }
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
        @SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = (ExpressionExperiment) e.getData();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
    factor.setType(FactorType.CATEGORICAL);
    factor.setName(ee.getShortName() + " design");
    factor.setExperimentalDesign(ee.getExperimentalDesign());
    factor = eeService.addFactor(ee, factor);
    FactorValue fv1 = FactorValue.Factory.newInstance();
    FactorValue fv2 = FactorValue.Factory.newInstance();
    fv1.setValue("foo");
    fv1.setExperimentalFactor(factor);
    fv2.setValue("bar");
    fv2.setIsBaseline(true);
    fv2.setExperimentalFactor(factor);
    eeService.addFactorValue(ee, fv1);
    eeService.addFactorValue(ee, fv2);
    List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
    Collections.sort(basInOrder, new Comparator<BioAssay>() {

        @Override
        public int compare(BioAssay o1, BioAssay o2) {
            return o1.getId().compareTo(o2.getId());
        }
    });
    int i = 0;
    for (BioAssay ba : basInOrder) {
        // bioAssayService.thawRawAndProcessed( ba );
        BioMaterial bm = ba.getSampleUsed();
        assert fv1.getId() != null;
        if (!bm.getFactorValues().isEmpty()) {
            continue;
        }
        if (i % 2 == 0) {
            bm.getFactorValues().add(fv1);
        // log.info( bm + " " + bm.getId() + " => " + fv1 );
        } else {
            bm.getFactorValues().add(fv2);
        // log.info( bm + " " + bm.getId() + " => " + fv2 );
        }
        bioMaterialService.update(bm);
        i++;
    }
    factor = this.experimentalFactorService.load(factor.getId());
    assertEquals(2, factor.getFactorValues().size());
    /*
         * All that was setup. Now do the interesting bit
         */
    processedExpressionDataVectorService.reorderByDesign(ee.getId());
    /*
         * Now check the vectors...
         */
    Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    // ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
    // log.info( newMat );
    boolean foundVector = false;
    assertTrue(resortedVectors.size() > 0);
    for (ProcessedExpressionDataVector vector : resortedVectors) {
        i = 0;
        log.debug(vector.getDesignElement().getName() + " .........................");
        // thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
        // inside a transaction
        BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
        bioAssayDimensionService.thawLite(bioAssayDimension);
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        for (BioAssay ba : bioAssays) {
            BioMaterial bm = ba.getSampleUsed();
            assertEquals(1, bm.getFactorValues().size());
            FactorValue fv = bm.getFactorValues().iterator().next();
            assertNotNull(fv.getId());
            log.debug(ba.getId() + " " + fv.getId() + " " + fv);
            if (i < 10) {
                // first because it is baseline;
                assertEquals(fv2, fv);
            }
            i++;
        }
        /*
             * spot check the data, same place as before.
             */
        if (vector.getDesignElement().getName().equals("40")) {
            foundVector = true;
            ByteArrayConverter conv = new ByteArrayConverter();
            Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
            assertEquals(20, d.length);
            assertEquals(-0.08, d[1], 0.001);
            assertEquals(0.45, d[10], 0.001);
            assertEquals(Double.NaN, d[19], 0.001);
        }
    }
    assertTrue("test vector not found", foundVector);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 43 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionExperimentDaoImpl method remove.

@Override
public void remove(final ExpressionExperiment ee) {
    if (ee == null)
        throw new IllegalArgumentException();
    Session session = this.getSessionFactory().getCurrentSession();
    try {
        // Note that links and analyses are deleted separately - see the ExpressionExperimentService.
        // At this point, the ee is probably still in the session, as the service already has gotten it
        // in this transaction.
        session.flush();
        session.clear();
        session.buildLockRequest(LockOptions.NONE).lock(ee);
        Hibernate.initialize(ee.getAuditTrail());
        Set<BioAssayDimension> dims = new HashSet<>();
        Set<QuantitationType> qts = new HashSet<>();
        Collection<RawExpressionDataVector> designElementDataVectors = ee.getRawExpressionDataVectors();
        Hibernate.initialize(designElementDataVectors);
        ee.setRawExpressionDataVectors(null);
        /*
             * We don't remove the investigators, just breaking the association.
             */
        ee.getInvestigators().clear();
        int count = 0;
        if (designElementDataVectors != null) {
            count = this.removeDataVectors(session, dims, qts, designElementDataVectors, count);
        }
        Collection<ProcessedExpressionDataVector> processedVectors = ee.getProcessedExpressionDataVectors();
        Hibernate.initialize(processedVectors);
        if (processedVectors != null && processedVectors.size() > 0) {
            ee.setProcessedExpressionDataVectors(null);
            this.removeProcessedVectors(session, dims, qts, count, processedVectors);
        }
        session.flush();
        session.clear();
        session.update(ee);
        AbstractDao.log.info("Removing BioAssay Dimensions ...");
        for (BioAssayDimension dim : dims) {
            dim.getBioAssays().clear();
            session.update(dim);
            session.delete(dim);
        }
        dims.clear();
        session.flush();
        AbstractDao.log.info("Removing Bioassays and biomaterials ...");
        // keep to put back in the object.
        Map<BioAssay, BioMaterial> copyOfRelations = new HashMap<>();
        Collection<BioMaterial> bioMaterialsToDelete = new HashSet<>();
        Collection<BioAssay> bioAssays = ee.getBioAssays();
        this.removeBioAssays(session, copyOfRelations, bioMaterialsToDelete, bioAssays);
        AbstractDao.log.info("Last bits ...");
        // We remove them here in case they are associated to more than one bioassay-- no cascade is possible.
        for (BioMaterial bm : bioMaterialsToDelete) {
            session.delete(bm);
        }
        for (QuantitationType qt : qts) {
            session.delete(qt);
        }
        session.flush();
        session.delete(ee);
        /*
             * Put transient instances back. This is possibly useful for clearing ACLS.
             */
        ee.setProcessedExpressionDataVectors(processedVectors);
        ee.setRawExpressionDataVectors(designElementDataVectors);
        for (BioAssay ba : ee.getBioAssays()) {
            ba.setSampleUsed(copyOfRelations.get(ba));
        }
        AbstractDao.log.info("Deleted " + ee);
    } catch (Exception e) {
        AbstractDao.log.error(e);
    } finally {
        AbstractDao.log.info("Finalising remove method.");
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) NotImplementedException(org.apache.commons.lang.NotImplementedException) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 44 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class DesignElementDataVectorDaoImpl method thawRawAndProcessed.

@Override
public void thawRawAndProcessed(Collection<DesignElementDataVector> designElementDataVectors) {
    if (designElementDataVectors == null)
        return;
    Session session = this.getSessionFactory().getCurrentSession();
    Hibernate.initialize(designElementDataVectors);
    StopWatch timer = new StopWatch();
    timer.start();
    Collection<ExpressionExperiment> ees = new HashSet<>();
    Map<BioAssayDimension, Collection<DesignElementDataVector>> dims = new HashMap<>();
    Collection<CompositeSequence> cs = new HashSet<>();
    for (DesignElementDataVector vector : designElementDataVectors) {
        session.buildLockRequest(LockOptions.NONE).lock(vector);
        Hibernate.initialize(vector);
        Hibernate.initialize(vector.getQuantitationType());
        BioAssayDimension bad = vector.getBioAssayDimension();
        if (!dims.containsKey(bad)) {
            dims.put(bad, new HashSet<DesignElementDataVector>());
        }
        dims.get(bad).add(vector);
        cs.add(vector.getDesignElement());
        ees.add(vector.getExpressionExperiment());
        session.evict(vector.getQuantitationType());
        session.evict(vector);
    }
    if (timer.getTime() > designElementDataVectors.size()) {
        AbstractDao.log.info("Thaw phase 1, " + designElementDataVectors.size() + " vectors initialized in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // lightly thawRawAndProcessed the EEs we saw
    for (ExpressionExperiment ee : ees) {
        Hibernate.initialize(ee);
        session.evict(ee);
    }
    if (timer.getTime() > 200) {
        AbstractDao.log.info("Thaw phase 2, " + ees.size() + " vector-associated expression experiments in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // thawRawAndProcessed the bioassayDimensions we saw -- usually one, more rarely two.
    for (BioAssayDimension bad : dims.keySet()) {
        BioAssayDimension tbad = (BioAssayDimension) this.getSessionFactory().getCurrentSession().createQuery("select distinct bad from BioAssayDimension bad join fetch bad.bioAssays ba join fetch ba.sampleUsed " + "bm join fetch ba.arrayDesignUsed left join fetch bm.factorValues fetch all properties where bad.id= :bad ").setParameter("bad", bad.getId()).uniqueResult();
        assert tbad != null;
        assert !dims.get(tbad).isEmpty();
        for (DesignElementDataVector v : designElementDataVectors) {
            if (v.getBioAssayDimension().getId().equals(tbad.getId())) {
                v.setBioAssayDimension(tbad);
            }
        }
    }
    if (timer.getTime() > 1000) {
        AbstractDao.log.info("Thaw phase 3, " + dims.size() + " vector-associated bioassaydimensions in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // thawRawAndProcessed the designelements we saw. SLOW
    long lastTime = 0;
    int count = 0;
    for (CompositeSequence de : cs) {
        BioSequence seq = de.getBiologicalCharacteristic();
        if (seq == null)
            continue;
        session.buildLockRequest(LockOptions.NONE).lock(seq);
        Hibernate.initialize(seq);
        // is this really necessary?
        ArrayDesign arrayDesign = de.getArrayDesign();
        Hibernate.initialize(arrayDesign);
        if (++count % 10000 == 0) {
            if (timer.getTime() - lastTime > 1000) {
                AbstractDao.log.info("Thawed " + count + " vector-associated probes " + timer.getTime() + " ms");
            }
            lastTime = timer.getTime();
        }
    }
    timer.stop();
    if (designElementDataVectors.size() >= 2000 || timer.getTime() > 200) {
        AbstractDao.log.info("Thaw phase 4 " + cs.size() + " vector-associated probes thawed in " + timer.getTime() + "ms");
    }
}
Also used : HashMap(java.util.HashMap) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) HashSet(java.util.HashSet)

Example 45 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertGeoSampleList.

/**
 * @param datasetSamples List of GeoSamples to be matched up with BioAssays.
 * @param expExp         ExpresssionExperiment
 * @return BioAssayDimension representing the samples.
 */
private BioAssayDimension convertGeoSampleList(List<GeoSample> datasetSamples, ExpressionExperiment expExp) {
    BioAssayDimension resultBioAssayDimension = BioAssayDimension.Factory.newInstance();
    StringBuilder bioAssayDimName = new StringBuilder();
    Collections.sort(datasetSamples);
    bioAssayDimName.append(expExp.getShortName()).append(": ");
    for (GeoSample sample : datasetSamples) {
        boolean found;
        String sampleAcc = sample.getGeoAccession();
        bioAssayDimName.append(sampleAcc).append(",");
        found = this.matchSampleToBioAssay(expExp, resultBioAssayDimension, sampleAcc);
        if (!found) {
            // this is normal because not all headings are
            // sample ids.
            GeoConverterImpl.log.warn("No bioassay match for " + sampleAcc);
        }
    }
    GeoConverterImpl.log.debug(resultBioAssayDimension.getBioAssays().size() + " Bioassays in biodimension");
    resultBioAssayDimension.setName(this.formatName(bioAssayDimName));
    resultBioAssayDimension.setDescription(bioAssayDimName.toString());
    return resultBioAssayDimension;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2