use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionExperimentDaoImpl method remove.
@Override
public void remove(final ExpressionExperiment ee) {
if (ee == null)
throw new IllegalArgumentException();
Session session = this.getSessionFactory().getCurrentSession();
try {
// Note that links and analyses are deleted separately - see the ExpressionExperimentService.
// At this point, the ee is probably still in the session, as the service already has gotten it
// in this transaction.
session.flush();
session.clear();
session.buildLockRequest(LockOptions.NONE).lock(ee);
Hibernate.initialize(ee.getAuditTrail());
Set<BioAssayDimension> dims = new HashSet<>();
Set<QuantitationType> qts = new HashSet<>();
Collection<RawExpressionDataVector> designElementDataVectors = ee.getRawExpressionDataVectors();
Hibernate.initialize(designElementDataVectors);
ee.setRawExpressionDataVectors(null);
/*
* We don't remove the investigators, just breaking the association.
*/
ee.getInvestigators().clear();
int count = 0;
if (designElementDataVectors != null) {
count = this.removeDataVectors(session, dims, qts, designElementDataVectors, count);
}
Collection<ProcessedExpressionDataVector> processedVectors = ee.getProcessedExpressionDataVectors();
Hibernate.initialize(processedVectors);
if (processedVectors != null && processedVectors.size() > 0) {
ee.setProcessedExpressionDataVectors(null);
this.removeProcessedVectors(session, dims, qts, count, processedVectors);
}
session.flush();
session.clear();
session.update(ee);
AbstractDao.log.info("Removing BioAssay Dimensions ...");
for (BioAssayDimension dim : dims) {
dim.getBioAssays().clear();
session.update(dim);
session.delete(dim);
}
dims.clear();
session.flush();
AbstractDao.log.info("Removing Bioassays and biomaterials ...");
// keep to put back in the object.
Map<BioAssay, BioMaterial> copyOfRelations = new HashMap<>();
Collection<BioMaterial> bioMaterialsToDelete = new HashSet<>();
Collection<BioAssay> bioAssays = ee.getBioAssays();
this.removeBioAssays(session, copyOfRelations, bioMaterialsToDelete, bioAssays);
AbstractDao.log.info("Last bits ...");
// We remove them here in case they are associated to more than one bioassay-- no cascade is possible.
for (BioMaterial bm : bioMaterialsToDelete) {
session.delete(bm);
}
for (QuantitationType qt : qts) {
session.delete(qt);
}
session.flush();
session.delete(ee);
/*
* Put transient instances back. This is possibly useful for clearing ACLS.
*/
ee.setProcessedExpressionDataVectors(processedVectors);
ee.setRawExpressionDataVectors(designElementDataVectors);
for (BioAssay ba : ee.getBioAssays()) {
ba.setSampleUsed(copyOfRelations.get(ba));
}
AbstractDao.log.info("Deleted " + ee);
} catch (Exception e) {
AbstractDao.log.error(e);
} finally {
AbstractDao.log.info("Finalising remove method.");
}
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertVectorsForPlatform.
/**
* For data coming from a single platform, create vectors.
*
* @param values A GeoValues object holding the parsed results.
*/
private void convertVectorsForPlatform(GeoValues values, ExpressionExperiment expExp, List<GeoSample> datasetSamples, GeoPlatform geoPlatform) {
assert datasetSamples.size() > 0 : "No samples in dataset";
if (!geoPlatform.useDataFromGeo()) {
// see bug 4181
GeoConverterImpl.log.warn("Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" + geoPlatform + ")");
return;
}
GeoConverterImpl.log.info("Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() + " samples.");
BioAssayDimension bioAssayDimension = this.convertGeoSampleList(datasetSamples, expExp);
if (bioAssayDimension.getBioAssays().size() == 0)
throw new IllegalStateException("No bioAssays in the BioAssayDimension");
this.sanityCheckQuantitationTypes(datasetSamples);
List<String> quantitationTypes = datasetSamples.iterator().next().getColumnNames();
List<String> quantitationTypeDescriptions = datasetSamples.iterator().next().getColumnDescriptions();
boolean first = true;
for (String quantitationType : quantitationTypes) {
// skip the first quantitationType, it's the ID or ID_REF.
if (first) {
first = false;
continue;
}
int columnAccordingToSample = quantitationTypes.indexOf(quantitationType);
int quantitationTypeIndex = values.getQuantitationTypeIndex(geoPlatform, quantitationType);
GeoConverterImpl.log.debug("Processing " + quantitationType + " (column=" + quantitationTypeIndex + " - according to sample, it's " + columnAccordingToSample + ")");
Map<String, List<Object>> dataVectors = this.makeDataVectors(values, datasetSamples, quantitationTypeIndex);
if (dataVectors == null || dataVectors.size() == 0) {
GeoConverterImpl.log.debug("No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")");
continue;
}
GeoConverterImpl.log.info(dataVectors.size() + " data vectors for " + quantitationType);
Object exampleValue = dataVectors.values().iterator().next().iterator().next();
QuantitationType qt = QuantitationType.Factory.newInstance();
qt.setName(quantitationType);
String description = quantitationTypeDescriptions.get(columnAccordingToSample);
qt.setDescription(description);
QuantitationTypeParameterGuesser.guessQuantitationTypeParameters(qt, quantitationType, description, exampleValue);
int count = 0;
int skipped = 0;
for (String designElementName : dataVectors.keySet()) {
List<Object> dataVector = dataVectors.get(designElementName);
if (dataVector == null || dataVector.size() == 0)
continue;
RawExpressionDataVector vector = this.convertDesignElementDataVector(geoPlatform, expExp, bioAssayDimension, designElementName, dataVector, qt);
if (vector == null) {
skipped++;
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("Null vector for DE=" + designElementName + " QT=" + quantitationType);
continue;
}
if (GeoConverterImpl.log.isTraceEnabled()) {
GeoConverterImpl.log.trace(designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " + dataVector.size() + " elements in vector");
}
expExp.getRawExpressionDataVectors().add(vector);
if (++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log.isDebugEnabled()) {
GeoConverterImpl.log.debug(count + " Data vectors added");
}
}
if (count > 0) {
expExp.getQuantitationTypes().add(qt);
if (GeoConverterImpl.log.isDebugEnabled() && count > 1000) {
GeoConverterImpl.log.debug(count + " Data vectors added for '" + quantitationType + "'");
}
} else {
GeoConverterImpl.log.info("No vectors were retained for " + quantitationType + " -- usually this is due to all values being missing.");
}
if (skipped > 0) {
GeoConverterImpl.log.info("Skipped " + skipped + " vectors");
}
}
GeoConverterImpl.log.info("Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform + ", " + expExp.getQuantitationTypes().size() + " quantitation types.");
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertDesignElementDataVector.
private RawExpressionDataVector convertDesignElementDataVector(GeoPlatform geoPlatform, ExpressionExperiment expExp, BioAssayDimension bioAssayDimension, String designElementName, List<Object> dataVector, QuantitationType qt) {
if (dataVector == null || dataVector.size() == 0)
return null;
int numValuesExpected = bioAssayDimension.getBioAssays().size();
if (dataVector.size() != numValuesExpected) {
throw new IllegalArgumentException("Expected " + numValuesExpected + " in bioassaydimension, data contains " + dataVector.size());
}
byte[] blob = this.convertData(dataVector, qt);
if (blob == null) {
// all missing etc.
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("All missing values for DE=" + designElementName + " QT=" + qt);
return null;
}
if (GeoConverterImpl.log.isDebugEnabled()) {
GeoConverterImpl.log.debug(blob.length + " bytes for " + dataVector.size() + " raw elements");
}
ArrayDesign p = this.convertPlatform(geoPlatform);
assert p != null;
Map<String, CompositeSequence> designMap = platformDesignElementMap.get(p.getShortName());
assert designMap != null;
/*
* Replace name with the one we're using in the array design after conversion. This information gets filled in
* earlier in the conversion process (see GeoService)
*/
String mappedName = geoPlatform.getProbeNamesInGemma().get(designElementName);
if (mappedName == null) {
// Sigh..this is unlikely to work in general, but see bug 1709.
mappedName = geoPlatform.getProbeNamesInGemma().get(designElementName.toUpperCase());
}
if (mappedName == null) {
throw new IllegalStateException("There is no probe matching " + designElementName);
}
CompositeSequence compositeSequence = designMap.get(mappedName);
if (compositeSequence == null)
throw new IllegalStateException("No composite sequence " + designElementName);
if (compositeSequence.getBiologicalCharacteristic() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase().getName() == null) {
// this is obscure.
throw new IllegalStateException(compositeSequence + " sequence accession external database lacks name");
}
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("Associating " + compositeSequence + " with dedv");
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setDesignElement(compositeSequence);
vector.setExpressionExperiment(expExp);
vector.setBioAssayDimension(bioAssayDimension);
vector.setQuantitationType(qt);
vector.setData(blob);
return vector;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class DataUpdater method makeNewVectors.
private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
Collection<RawExpressionDataVector> vectors = new HashSet<>();
BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
assert bioAssayDimension != null;
assert !bioAssayDimension.getBioAssays().isEmpty();
bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
assert !bioAssayDimension.getBioAssays().isEmpty();
for (int i = 0; i < data.rows(); i++) {
byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setData(bdata);
CompositeSequence cs = data.getRowElement(i).getDesignElement();
if (cs == null) {
continue;
}
if (!cs.getArrayDesign().equals(targetPlatform)) {
throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
}
vector.setDesignElement(cs);
vector.setQuantitationType(qt);
vector.setExpressionExperiment(ee);
vector.setBioAssayDimension(bioAssayDimension);
vectors.add(vector);
}
return vectors;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class DataUpdater method reprocessAffyThreePrimeArrayData.
/**
* @param ee ee
* @return This replaces the existing raw data with the CEL file data. CEL file(s) must be found by configuration
*/
// Possible external use
@SuppressWarnings("UnusedReturnValue")
public ExpressionExperiment reprocessAffyThreePrimeArrayData(ExpressionExperiment ee) {
Collection<ArrayDesign> arrayDesignsUsed = this.experimentService.getArrayDesignsUsed(ee);
ee = experimentService.thawLite(ee);
RawDataFetcher f = new RawDataFetcher();
Collection<LocalFile> files = f.fetch(ee.getAccession().getAccession());
if (files.isEmpty()) {
throw new RuntimeException("Data was apparently not available");
}
Collection<RawExpressionDataVector> vectors = new HashSet<>();
// Use the same QT for each one
QuantitationType qt = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
qt = quantitationTypeService.create(qt);
for (ArrayDesign ad : arrayDesignsUsed) {
DataUpdater.log.info("Processing data for " + ad);
String cdfFileName = this.findCdf(ad).getAbsolutePath();
ad = arrayDesignService.thaw(ad);
AffyPowerToolsProbesetSummarize apt = new AffyPowerToolsProbesetSummarize(qt);
vectors.addAll(apt.processThreeprimeArrayData(ee, cdfFileName, ad, files));
}
if (vectors.isEmpty()) {
throw new IllegalStateException("No vectors were returned for " + ee);
}
ee = experimentService.replaceRawVectors(ee, vectors);
this.audit(ee, "Data vector computation from CEL files using AffyPowerTools for " + StringUtils.join(arrayDesignsUsed, "; "), true);
if (arrayDesignsUsed.size() == 1) {
this.postprocess(ee);
} else {
DataUpdater.log.warn("Skipping postprocessing for mult-platform experiment");
}
return ee;
}
Aggregations