use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method mergeVectors.
@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
if (arrayDesigns.size() > 1) {
throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
}
ee = expressionExperimentService.thaw(ee);
Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
/*
* Load all the bioassay dimensions, which will be merged.
*/
Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
// not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
continue;
}
allOldBioAssayDims.add(bioAssayDim);
}
}
if (allOldBioAssayDims.size() == 0) {
throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
}
if (allOldBioAssayDims.size() == 1) {
VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
return ee;
}
VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
int totalBioAssays = newBioAd.getBioAssays().size();
assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
/*
* This will run into problems if there are excess quantitation types
*/
int numSuccessfulMergers = 0;
for (QuantitationType type : qt2Vec.keySet()) {
Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
if (oldVecs.isEmpty()) {
VectorMergingServiceImpl.log.warn("No vectors for " + type);
continue;
}
Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
if (deVMap == null) {
VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
continue;
}
VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors for " + type);
Collection<RawExpressionDataVector> newVectors = new HashSet<>();
int numAllMissing = 0;
int missingValuesForQt = 0;
for (CompositeSequence de : deVMap.keySet()) {
RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
/*
* these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
* old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
* the values for that dimension with missing data. We go through the dimensions in the same order that
* we joined them up.
*/
List<Object> data = new ArrayList<>();
int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
missingValuesForQt += totalMissingInVector;
if (totalMissingInVector == totalBioAssays) {
numAllMissing++;
// we don't save data that is all missing.
continue;
}
if (data.size() != totalBioAssays) {
throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
}
byte[] newDataAr = converter.toBytes(data.toArray());
vector.setData(newDataAr);
newVectors.add(vector);
}
// TRANSACTION
vectorMergingHelperService.persist(ee, type, newVectors);
if (numAllMissing > 0) {
VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
}
if (missingValuesForQt > 0) {
VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
}
VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
rawExpressionDataVectorService.remove(oldVecs);
ee.getRawExpressionDataVectors().removeAll(oldVecs);
numSuccessfulMergers++;
}
if (numSuccessfulMergers == 0) {
/*
* Try to clean up
*/
this.bioAssayDimensionService.remove(newBioAd);
throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
}
expressionExperimentService.update(ee);
// Several transactions
this.cleanUp(ee, allOldBioAssayDims, newBioAd);
// transaction
this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
// several transactions
try {
preprocessorService.process(ee);
} catch (PreprocessingException e) {
VectorMergingServiceImpl.log.error("Error during postprocessing", e);
}
return ee;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class BatchConfound method getBioMaterialFactorMap.
private static Map<ExperimentalFactor, Map<Long, Double>> getBioMaterialFactorMap(ExpressionExperiment ee) {
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = new HashMap<>();
for (BioAssay bioAssay : ee.getBioAssays()) {
BioMaterial bm = bioAssay.getSampleUsed();
SVDServiceHelperImpl.populateBMFMap(bioMaterialFactorMap, bm);
}
return bioMaterialFactorMap;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method checkCorrectability.
@Override
public boolean checkCorrectability(ExpressionExperiment ee, boolean force) {
for (QuantitationType qt : expressionExperimentService.getQuantitationTypes(ee)) {
if (qt.getIsBatchCorrected()) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment already has a batch-corrected quantitation type: " + ee + ": " + qt);
return false;
}
}
ExperimentalFactor batch = this.getBatchFactor(ee);
if (batch == null) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found: " + ee);
return false;
}
String bConf = expressionExperimentService.getBatchConfound(ee);
if (bConf != null && !force) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment can not be batch corrected: " + bConf);
ExpressionExperimentBatchCorrectionServiceImpl.log.info("To force batch-correction of a confounded experiment, use the force option (note, that this option also allows outliers while batch correcting).");
return false;
}
/*
* Make sure we have at least two samples per batch. This generally won't happen if batches were defined by
* Gemma.
*/
Map<Long, Integer> batches = new HashMap<>();
Set<BioMaterial> seen = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
BioMaterial bm = ba.getSampleUsed();
if (seen.contains(bm))
continue;
seen.add(bm);
for (FactorValue fv : bm.getFactorValues()) {
if (fv.getExperimentalFactor().equals(batch)) {
Long batchId = fv.getId();
if (!batches.containsKey(batchId))
batches.put(batchId, 0);
batches.put(batchId, batches.get(batchId) + 1);
}
}
}
/*
* consider merging batches. - we already do this when we create the batch factor, so in general batches should
* always have at least 2 samples
*/
for (Long batchId : batches.keySet()) {
if (batches.get(batchId) < 2) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info("Batch with only one sample detected, correction not possible: " + ee + ", batchId=" + batchId);
return false;
}
}
return true;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class OutlierFlaggingServiceImpl method unmarkAsMissing.
@Override
public void unmarkAsMissing(Collection<BioAssay> bioAssays) {
if (bioAssays.isEmpty())
return;
boolean hasReversions = false;
for (BioAssay bioAssay : bioAssays) {
if (!bioAssay.getIsOutlier()) {
continue;
}
// Rather long transaction.
hasReversions = true;
bioAssay.setIsOutlier(false);
bioAssayService.update(bioAssay);
}
if (!hasReversions) {
return;
}
ExpressionExperiment expExp = expressionExperimentService.findByBioAssay(bioAssays.iterator().next());
auditTrailService.addUpdateEvent(expExp, SampleRemovalReversionEvent.Factory.newInstance(), "Marked " + bioAssays.size() + " bioassays as non-missing", StringUtils.join(bioAssays, ""));
assert expExp != null;
// several transactions
try {
preprocessorService.process(expExp);
} catch (PreprocessingException e) {
OutlierFlaggingServiceImpl.log.error("Error during postprocessing, make sure additional steps are completed", e);
}
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class AuditAdviceTest method testCascadingCreateWithAssociatedAuditable.
@Test
public void testCascadingCreateWithAssociatedAuditable() {
ExpressionExperiment ee = this.getTestPersistentCompleteExpressionExperiment(false);
ee = this.expressionExperimentService.load(ee.getId());
ee = expressionExperimentService.thawLite(ee);
assertEquals(16, ee.getBioAssays().size());
assertNotNull(ee.getBioAssays().iterator().next().getId());
assertEquals(1, ee.getAuditTrail().getEvents().size());
for (BioAssay prod : ee.getBioAssays()) {
assertNotNull(prod.getAuditTrail());
Collection<AuditEvent> events = this.auditTrailService.getEvents(prod);
assertEquals(1, events.size());
for (AuditEvent e : events) {
assertNotNull(e.getId());
assertEquals(AuditAction.CREATE, e.getAction());
}
}
}
Aggregations