use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method convert.
@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
if (matrix == null || metaData == null) {
throw new IllegalArgumentException("One or all of method arguments was null");
}
ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
Taxon taxon = this.convertTaxon(metaData.getTaxon());
experiment.setName(metaData.getName());
experiment.setShortName(metaData.getShortName());
experiment.setDescription(metaData.getDescription());
experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
experiment.setExperimentalDesign(ed);
if (metaData.getPubMedId() != null) {
PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
experiment.setPrimaryPublication(ref);
}
QuantitationType quantitationType = this.convertQuantitationType(metaData);
/* set the quantitation types on the experiment */
Collection<QuantitationType> qTypes = new HashSet<>();
qTypes.add(quantitationType);
experiment.setQuantitationTypes(qTypes);
Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
// Divide up multiple array designs into multiple BioAssayDimensions.
Collection<RawExpressionDataVector> allVectors = new HashSet<>();
Collection<BioAssay> allBioAssays = new HashSet<>();
Collection<Object> usedDesignElements = new HashSet<>();
for (ArrayDesign design : arrayDesigns) {
SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
if (subMatrix == null) {
throw new IllegalStateException("Got a null matix");
}
BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
allVectors.addAll(vectors);
allBioAssays.addAll(bad.getBioAssays());
}
// sanity
if (usedDesignElements.size() != matrix.rows()) {
SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
}
experiment.setRawExpressionDataVectors(allVectors);
experiment.setBioAssays(allBioAssays);
return experiment;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method convertDesignElementDataVectors.
/**
* @return Collection<DesignElementDataVector>
*/
private Collection<RawExpressionDataVector> convertDesignElementDataVectors(ExpressionExperiment expressionExperiment, BioAssayDimension bioAssayDimension, ArrayDesign arrayDesign, QuantitationType quantitationType, DoubleMatrix<String, String> matrix) {
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
Collection<RawExpressionDataVector> vectors = new HashSet<>();
Map<String, CompositeSequence> csMap = new HashMap<>();
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
csMap.put(cs.getName(), cs);
}
for (int i = 0; i < matrix.rows(); i++) {
byte[] bdata = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setData(bdata);
CompositeSequence cs = csMap.get(matrix.getRowName(i));
if (cs == null) {
continue;
}
vector.setDesignElement(cs);
vector.setQuantitationType(quantitationType);
vector.setExpressionExperiment(expressionExperiment);
vector.setBioAssayDimension(bioAssayDimension);
vectors.add(vector);
}
SimpleExpressionDataLoaderServiceImpl.log.info("Created " + vectors.size() + " data vectors");
return vectors;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionExperimentPrePersistServiceImpl method prepare.
@Override
public ArrayDesignsForExperimentCache prepare(ExpressionExperiment ee, ArrayDesignsForExperimentCache cache) {
Map<ArrayDesign, Collection<CompositeSequence>> newprobes = new HashMap<>();
Collection<DesignElementDataVector> dataVectorsThatNeedNewProbes = new HashSet<>();
/*
* First time through.
*/
Collection<RawExpressionDataVector> vectors = ee.getRawExpressionDataVectors();
if (vectors.isEmpty()) {
/*
* That's okay; some data sets don't come with data.
*/
this.prepareWithoutData(ee, cache);
}
for (DesignElementDataVector dataVector : vectors) {
CompositeSequence probe = dataVector.getDesignElement();
assert probe != null;
ArrayDesign arrayDesign = probe.getArrayDesign();
assert arrayDesign != null : probe + " does not have an array design";
arrayDesign = this.loadOrPersistArrayDesignAndAddToCache(arrayDesign, cache);
CompositeSequence cachedProbe = cache.getFromCache(probe);
if (cachedProbe == null) {
if (!newprobes.containsKey(arrayDesign)) {
newprobes.put(arrayDesign, new HashSet<CompositeSequence>());
}
newprobes.get(arrayDesign).add(probe);
dataVectorsThatNeedNewProbes.add(dataVector);
} else {
dataVector.setDesignElement(cachedProbe);
}
}
/*
* Second pass - to fill in vectors that needed probes after the first pass.
*/
if (!dataVectorsThatNeedNewProbes.isEmpty()) {
ExpressionExperimentPrePersistServiceImpl.log.info(dataVectorsThatNeedNewProbes.size() + " vectors don't have probes, may add to the platform.");
newprobes = this.addNewDesignElementToPersistentArrayDesigns(newprobes);
if (newprobes.isEmpty()) {
ExpressionExperimentPrePersistServiceImpl.log.info("No probes were added");
// this is okay if there were none to add, but a problem otherwise.
} else {
// don't forget to cache them.
for (ArrayDesign ad : newprobes.keySet()) {
for (CompositeSequence cs : newprobes.get(ad)) {
cache.addToCache(cs);
}
}
// associate with vectors. This repeats code from above, needs refactoring...
for (DesignElementDataVector v : dataVectorsThatNeedNewProbes) {
CompositeSequence probe = v.getDesignElement();
probe = cache.getFromCache(probe);
if (probe == null || persisterHelper.isTransient(probe)) {
throw new IllegalStateException("All probes should be persistent by now");
}
v.setDesignElement(probe);
}
}
}
return cache;
}
Aggregations