use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method mergeVectors.
@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
if (arrayDesigns.size() > 1) {
throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
}
ee = expressionExperimentService.thaw(ee);
Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
/*
* Load all the bioassay dimensions, which will be merged.
*/
Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
// not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
continue;
}
allOldBioAssayDims.add(bioAssayDim);
}
}
if (allOldBioAssayDims.size() == 0) {
throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
}
if (allOldBioAssayDims.size() == 1) {
VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
return ee;
}
VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
int totalBioAssays = newBioAd.getBioAssays().size();
assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
/*
* This will run into problems if there are excess quantitation types
*/
int numSuccessfulMergers = 0;
for (QuantitationType type : qt2Vec.keySet()) {
Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
if (oldVecs.isEmpty()) {
VectorMergingServiceImpl.log.warn("No vectors for " + type);
continue;
}
Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
if (deVMap == null) {
VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
continue;
}
VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors for " + type);
Collection<RawExpressionDataVector> newVectors = new HashSet<>();
int numAllMissing = 0;
int missingValuesForQt = 0;
for (CompositeSequence de : deVMap.keySet()) {
RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
/*
* these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
* old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
* the values for that dimension with missing data. We go through the dimensions in the same order that
* we joined them up.
*/
List<Object> data = new ArrayList<>();
int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
missingValuesForQt += totalMissingInVector;
if (totalMissingInVector == totalBioAssays) {
numAllMissing++;
// we don't save data that is all missing.
continue;
}
if (data.size() != totalBioAssays) {
throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
}
byte[] newDataAr = converter.toBytes(data.toArray());
vector.setData(newDataAr);
newVectors.add(vector);
}
// TRANSACTION
vectorMergingHelperService.persist(ee, type, newVectors);
if (numAllMissing > 0) {
VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
}
if (missingValuesForQt > 0) {
VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
}
VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
rawExpressionDataVectorService.remove(oldVecs);
ee.getRawExpressionDataVectors().removeAll(oldVecs);
numSuccessfulMergers++;
}
if (numSuccessfulMergers == 0) {
/*
* Try to clean up
*/
this.bioAssayDimensionService.remove(newBioAd);
throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
}
expressionExperimentService.update(ee);
// Several transactions
this.cleanUp(ee, allOldBioAssayDims, newBioAd);
// transaction
this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
// several transactions
try {
preprocessorService.process(ee);
} catch (PreprocessingException e) {
VectorMergingServiceImpl.log.error("Error during postprocessing", e);
}
return ee;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testReorder.
@Test
public void testReorder() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE404");
if (old != null) {
eeService.remove(old);
}
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
@SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = (ExpressionExperiment) e.getData();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
factor.setType(FactorType.CATEGORICAL);
factor.setName(ee.getShortName() + " design");
factor.setExperimentalDesign(ee.getExperimentalDesign());
factor = eeService.addFactor(ee, factor);
FactorValue fv1 = FactorValue.Factory.newInstance();
FactorValue fv2 = FactorValue.Factory.newInstance();
fv1.setValue("foo");
fv1.setExperimentalFactor(factor);
fv2.setValue("bar");
fv2.setIsBaseline(true);
fv2.setExperimentalFactor(factor);
eeService.addFactorValue(ee, fv1);
eeService.addFactorValue(ee, fv2);
List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
Collections.sort(basInOrder, new Comparator<BioAssay>() {
@Override
public int compare(BioAssay o1, BioAssay o2) {
return o1.getId().compareTo(o2.getId());
}
});
int i = 0;
for (BioAssay ba : basInOrder) {
// bioAssayService.thawRawAndProcessed( ba );
BioMaterial bm = ba.getSampleUsed();
assert fv1.getId() != null;
if (!bm.getFactorValues().isEmpty()) {
continue;
}
if (i % 2 == 0) {
bm.getFactorValues().add(fv1);
// log.info( bm + " " + bm.getId() + " => " + fv1 );
} else {
bm.getFactorValues().add(fv2);
// log.info( bm + " " + bm.getId() + " => " + fv2 );
}
bioMaterialService.update(bm);
i++;
}
factor = this.experimentalFactorService.load(factor.getId());
assertEquals(2, factor.getFactorValues().size());
/*
* All that was setup. Now do the interesting bit
*/
processedExpressionDataVectorService.reorderByDesign(ee.getId());
/*
* Now check the vectors...
*/
Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
// ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
// log.info( newMat );
boolean foundVector = false;
assertTrue(resortedVectors.size() > 0);
for (ProcessedExpressionDataVector vector : resortedVectors) {
i = 0;
log.debug(vector.getDesignElement().getName() + " .........................");
// thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
// inside a transaction
BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
bioAssayDimensionService.thawLite(bioAssayDimension);
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
for (BioAssay ba : bioAssays) {
BioMaterial bm = ba.getSampleUsed();
assertEquals(1, bm.getFactorValues().size());
FactorValue fv = bm.getFactorValues().iterator().next();
assertNotNull(fv.getId());
log.debug(ba.getId() + " " + fv.getId() + " " + fv);
if (i < 10) {
// first because it is baseline;
assertEquals(fv2, fv);
}
i++;
}
/*
* spot check the data, same place as before.
*/
if (vector.getDesignElement().getName().equals("40")) {
foundVector = true;
ByteArrayConverter conv = new ByteArrayConverter();
Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
assertEquals(20, d.length);
assertEquals(-0.08, d[1], 0.001);
assertEquals(0.45, d[10], 0.001);
assertEquals(Double.NaN, d[19], 0.001);
}
}
assertTrue("test vector not found", foundVector);
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionExperimentDaoImpl method remove.
@Override
public void remove(final ExpressionExperiment ee) {
if (ee == null)
throw new IllegalArgumentException();
Session session = this.getSessionFactory().getCurrentSession();
try {
// Note that links and analyses are deleted separately - see the ExpressionExperimentService.
// At this point, the ee is probably still in the session, as the service already has gotten it
// in this transaction.
session.flush();
session.clear();
session.buildLockRequest(LockOptions.NONE).lock(ee);
Hibernate.initialize(ee.getAuditTrail());
Set<BioAssayDimension> dims = new HashSet<>();
Set<QuantitationType> qts = new HashSet<>();
Collection<RawExpressionDataVector> designElementDataVectors = ee.getRawExpressionDataVectors();
Hibernate.initialize(designElementDataVectors);
ee.setRawExpressionDataVectors(null);
/*
* We don't remove the investigators, just breaking the association.
*/
ee.getInvestigators().clear();
int count = 0;
if (designElementDataVectors != null) {
count = this.removeDataVectors(session, dims, qts, designElementDataVectors, count);
}
Collection<ProcessedExpressionDataVector> processedVectors = ee.getProcessedExpressionDataVectors();
Hibernate.initialize(processedVectors);
if (processedVectors != null && processedVectors.size() > 0) {
ee.setProcessedExpressionDataVectors(null);
this.removeProcessedVectors(session, dims, qts, count, processedVectors);
}
session.flush();
session.clear();
session.update(ee);
AbstractDao.log.info("Removing BioAssay Dimensions ...");
for (BioAssayDimension dim : dims) {
dim.getBioAssays().clear();
session.update(dim);
session.delete(dim);
}
dims.clear();
session.flush();
AbstractDao.log.info("Removing Bioassays and biomaterials ...");
// keep to put back in the object.
Map<BioAssay, BioMaterial> copyOfRelations = new HashMap<>();
Collection<BioMaterial> bioMaterialsToDelete = new HashSet<>();
Collection<BioAssay> bioAssays = ee.getBioAssays();
this.removeBioAssays(session, copyOfRelations, bioMaterialsToDelete, bioAssays);
AbstractDao.log.info("Last bits ...");
// We remove them here in case they are associated to more than one bioassay-- no cascade is possible.
for (BioMaterial bm : bioMaterialsToDelete) {
session.delete(bm);
}
for (QuantitationType qt : qts) {
session.delete(qt);
}
session.flush();
session.delete(ee);
/*
* Put transient instances back. This is possibly useful for clearing ACLS.
*/
ee.setProcessedExpressionDataVectors(processedVectors);
ee.setRawExpressionDataVectors(designElementDataVectors);
for (BioAssay ba : ee.getBioAssays()) {
ba.setSampleUsed(copyOfRelations.get(ba));
}
AbstractDao.log.info("Deleted " + ee);
} catch (Exception e) {
AbstractDao.log.error(e);
} finally {
AbstractDao.log.info("Finalising remove method.");
}
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class DesignElementDataVectorDaoImpl method thawRawAndProcessed.
@Override
public void thawRawAndProcessed(Collection<DesignElementDataVector> designElementDataVectors) {
if (designElementDataVectors == null)
return;
Session session = this.getSessionFactory().getCurrentSession();
Hibernate.initialize(designElementDataVectors);
StopWatch timer = new StopWatch();
timer.start();
Collection<ExpressionExperiment> ees = new HashSet<>();
Map<BioAssayDimension, Collection<DesignElementDataVector>> dims = new HashMap<>();
Collection<CompositeSequence> cs = new HashSet<>();
for (DesignElementDataVector vector : designElementDataVectors) {
session.buildLockRequest(LockOptions.NONE).lock(vector);
Hibernate.initialize(vector);
Hibernate.initialize(vector.getQuantitationType());
BioAssayDimension bad = vector.getBioAssayDimension();
if (!dims.containsKey(bad)) {
dims.put(bad, new HashSet<DesignElementDataVector>());
}
dims.get(bad).add(vector);
cs.add(vector.getDesignElement());
ees.add(vector.getExpressionExperiment());
session.evict(vector.getQuantitationType());
session.evict(vector);
}
if (timer.getTime() > designElementDataVectors.size()) {
AbstractDao.log.info("Thaw phase 1, " + designElementDataVectors.size() + " vectors initialized in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// lightly thawRawAndProcessed the EEs we saw
for (ExpressionExperiment ee : ees) {
Hibernate.initialize(ee);
session.evict(ee);
}
if (timer.getTime() > 200) {
AbstractDao.log.info("Thaw phase 2, " + ees.size() + " vector-associated expression experiments in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// thawRawAndProcessed the bioassayDimensions we saw -- usually one, more rarely two.
for (BioAssayDimension bad : dims.keySet()) {
BioAssayDimension tbad = (BioAssayDimension) this.getSessionFactory().getCurrentSession().createQuery("select distinct bad from BioAssayDimension bad join fetch bad.bioAssays ba join fetch ba.sampleUsed " + "bm join fetch ba.arrayDesignUsed left join fetch bm.factorValues fetch all properties where bad.id= :bad ").setParameter("bad", bad.getId()).uniqueResult();
assert tbad != null;
assert !dims.get(tbad).isEmpty();
for (DesignElementDataVector v : designElementDataVectors) {
if (v.getBioAssayDimension().getId().equals(tbad.getId())) {
v.setBioAssayDimension(tbad);
}
}
}
if (timer.getTime() > 1000) {
AbstractDao.log.info("Thaw phase 3, " + dims.size() + " vector-associated bioassaydimensions in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// thawRawAndProcessed the designelements we saw. SLOW
long lastTime = 0;
int count = 0;
for (CompositeSequence de : cs) {
BioSequence seq = de.getBiologicalCharacteristic();
if (seq == null)
continue;
session.buildLockRequest(LockOptions.NONE).lock(seq);
Hibernate.initialize(seq);
// is this really necessary?
ArrayDesign arrayDesign = de.getArrayDesign();
Hibernate.initialize(arrayDesign);
if (++count % 10000 == 0) {
if (timer.getTime() - lastTime > 1000) {
AbstractDao.log.info("Thawed " + count + " vector-associated probes " + timer.getTime() + " ms");
}
lastTime = timer.getTime();
}
}
timer.stop();
if (designElementDataVectors.size() >= 2000 || timer.getTime() > 200) {
AbstractDao.log.info("Thaw phase 4 " + cs.size() + " vector-associated probes thawed in " + timer.getTime() + "ms");
}
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertGeoSampleList.
/**
* @param datasetSamples List of GeoSamples to be matched up with BioAssays.
* @param expExp ExpresssionExperiment
* @return BioAssayDimension representing the samples.
*/
private BioAssayDimension convertGeoSampleList(List<GeoSample> datasetSamples, ExpressionExperiment expExp) {
BioAssayDimension resultBioAssayDimension = BioAssayDimension.Factory.newInstance();
StringBuilder bioAssayDimName = new StringBuilder();
Collections.sort(datasetSamples);
bioAssayDimName.append(expExp.getShortName()).append(": ");
for (GeoSample sample : datasetSamples) {
boolean found;
String sampleAcc = sample.getGeoAccession();
bioAssayDimName.append(sampleAcc).append(",");
found = this.matchSampleToBioAssay(expExp, resultBioAssayDimension, sampleAcc);
if (!found) {
// this is normal because not all headings are
// sample ids.
GeoConverterImpl.log.warn("No bioassay match for " + sampleAcc);
}
}
GeoConverterImpl.log.debug(resultBioAssayDimension.getBioAssays().size() + " Bioassays in biodimension");
resultBioAssayDimension.setName(this.formatName(bioAssayDimName));
resultBioAssayDimension.setDescription(bioAssayDimName.toString());
return resultBioAssayDimension;
}
Aggregations