use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class PersistentDummyObjectHelper method getDesignElementDataVectors.
/**
* @param bioAssays BAs
* @param ad AD
* @param ee EE
* @param quantitationTypes QTs
* @return These are non-persistent
*/
private Collection<RawExpressionDataVector> getDesignElementDataVectors(ExpressionExperiment ee, Collection<QuantitationType> quantitationTypes, List<BioAssay> bioAssays, ArrayDesign ad) {
BioAssayDimension baDim = BioAssayDimension.Factory.newInstance(ee.getShortName() + "_" + RandomStringUtils.randomAlphanumeric(20), null, bioAssays);
Collection<RawExpressionDataVector> vectors = new HashSet<>();
for (QuantitationType quantType : quantitationTypes) {
for (CompositeSequence cs : ad.getCompositeSequences()) {
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
byte[] bdata = this.getDoubleData();
vector.setData(bdata);
vector.setDesignElement(cs);
assert cs.getArrayDesign() != null;
vector.setExpressionExperiment(ee);
vector.setQuantitationType(quantType);
vector.setBioAssayDimension(baDim);
vectors.add(vector);
}
}
return vectors;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.
/**
* This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
* creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
* we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
* are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
* DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
* BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
* BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
* BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
*/
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
ByteArrayConverter bac = new ByteArrayConverter();
ee = ExpressionExperiment.Factory.newInstance();
QuantitationType qt = QuantitationType.Factory.newInstance();
qt.setName("VALUE");
qt.setIsBackgroundSubtracted(false);
qt.setIsNormalized(false);
qt.setIsBackground(false);
qt.setIsRatio(false);
qt.setIsPreferred(true);
qt.setIsMaskedPreferred(false);
qt.setRepresentation(PrimitiveType.DOUBLE);
BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
bioAssayDimension.setName("GSM15697, GSM15744");
List<BioAssay> assays = new ArrayList<>();
BioAssay assay1 = BioAssay.Factory.newInstance();
assay1.setName("Current Smoker 73");
BioMaterial sample1 = BioMaterial.Factory.newInstance();
sample1.setName("GSM15697");
assay1.setSampleUsed(sample1);
assays.add(assay1);
BioAssay assay2 = BioAssay.Factory.newInstance();
assay2.setName("Former Smoker 34");
BioMaterial sample2 = BioMaterial.Factory.newInstance();
sample2.setName("GSM15744");
assay2.setSampleUsed(sample2);
assays.add(assay2);
bioAssayDimension.setBioAssays(assays);
RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
double[] ddata1 = { 74.9, 101.7 };
byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
vector1.setData(bdata1);
vector1.setQuantitationType(qt);
vector1.setBioAssayDimension(bioAssayDimension);
RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
double[] ddata2 = { 404.6, 318.7 };
byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
vector2.setData(bdata2);
vector2.setQuantitationType(qt);
vector2.setBioAssayDimension(bioAssayDimension);
ArrayDesign ad = ArrayDesign.Factory.newInstance();
ad.setName("test ar");
CompositeSequence de1 = CompositeSequence.Factory.newInstance();
de1.setName("218120_s_at");
vector1.setDesignElement(de1);
BioSequence bs1 = BioSequence.Factory.newInstance();
bs1.setName("test1");
de1.setBiologicalCharacteristic(bs1);
de1.setArrayDesign(ad);
CompositeSequence de2 = CompositeSequence.Factory.newInstance();
de2.setName("121_at");
BioSequence bs2 = BioSequence.Factory.newInstance();
bs2.setName("test2");
de2.setBiologicalCharacteristic(bs2);
de2.setArrayDesign(ad);
vector2.setDesignElement(de2);
Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
eeVectors.add(vector1);
eeVectors.add(vector2);
ee.setRawExpressionDataVectors(eeVectors);
ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
assertNotNull(expressionDataMatrix);
assertEquals(expressionDataMatrix.rows(), 2);
assertEquals(expressionDataMatrix.columns(), 2);
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method combineBioAssayDimensions.
/**
* Create a new one or use an existing one. (an existing one might be found if this process was started once before
* and aborted partway through).
*
* @param oldDims in the sort order to be used.
* @return BA dim
*/
private BioAssayDimension combineBioAssayDimensions(List<BioAssayDimension> oldDims) {
List<BioAssay> bioAssays = new ArrayList<>();
for (BioAssayDimension bioAd : oldDims) {
for (BioAssay bioAssay : bioAd.getBioAssays()) {
if (bioAssays.contains(bioAssay)) {
throw new IllegalStateException("Duplicate bioassay for biodimension: " + bioAssay + "; inspecting " + oldDims.size() + " BioAssayDimensions");
}
bioAssays.add(bioAssay);
}
}
// first see if we already have an equivalent one.
boolean found = true;
for (BioAssayDimension newDim : oldDims) {
// size should be the same.
List<BioAssay> assaysInExisting = newDim.getBioAssays();
if (assaysInExisting.size() != bioAssays.size()) {
continue;
}
for (int i = 0; i < bioAssays.size(); i++) {
if (!assaysInExisting.get(i).equals(bioAssays.get(i))) {
found = false;
break;
}
}
if (!found)
continue;
VectorMergingServiceImpl.log.info("Already have a dimension created that fits the bill - removing it from the 'old' list.");
oldDims.remove(newDim);
return newDim;
}
BioAssayDimension newBioAd = BioAssayDimension.Factory.newInstance();
newBioAd.setName("");
newBioAd.setDescription(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX + " " + oldDims.size() + " dimensions: ");
for (BioAssayDimension bioAd : oldDims) {
newBioAd.setName(newBioAd.getName() + bioAd.getName() + " ");
newBioAd.setDescription(newBioAd.getDescription() + bioAd.getName() + " ");
}
newBioAd.setName(StringUtils.abbreviate(newBioAd.getName(), 255));
newBioAd.setBioAssays(bioAssays);
newBioAd = bioAssayDimensionService.create(newBioAd);
VectorMergingServiceImpl.log.info("Created new bioAssayDimension with " + newBioAd.getBioAssays().size() + " bioassays.");
return newBioAd;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method getVectors.
/**
* Get the current set of vectors that need to be updated.
*
* @param expExp ee
* @param qts - only used to check for problems.
* @param allOldBioAssayDims old BA dims
* @return map
*/
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
for (BioAssayDimension dim : allOldBioAssayDims) {
oldVectors.addAll(rawExpressionDataVectorService.find(dim));
}
if (oldVectors.isEmpty()) {
throw new IllegalStateException("No vectors");
}
rawExpressionDataVectorService.thaw(oldVectors);
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
Collection<QuantitationType> qtsToAdd = new HashSet<>();
for (RawExpressionDataVector v : oldVectors) {
QuantitationType qt = v.getQuantitationType();
if (!qts.contains(qt)) {
/*
* Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
* have, due to corruption.
*/
qtsToAdd.add(qt);
}
if (!qt2Vec.containsKey(qt)) {
qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
}
qt2Vec.get(qt).add(v);
}
if (!qtsToAdd.isEmpty()) {
expExp.getQuantitationTypes().addAll(qtsToAdd);
VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
expressionExperimentService.update(expExp);
}
return qt2Vec;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method makeMergedData.
/**
* @param sortedOldDims sorted old dims
* @param newBioAd new BA dims
* @param type type
* @param de de
* @param dedvs dedvs
* @param mergedData starts out empty, is initalized to the new data.
* @return number of values missing
*/
private int makeMergedData(List<BioAssayDimension> sortedOldDims, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de, Collection<RawExpressionDataVector> dedvs, List<Object> mergedData) {
int totalMissingInVector = 0;
PrimitiveType representation = type.getRepresentation();
for (BioAssayDimension oldDim : sortedOldDims) {
// careful, the 'new' bioAssayDimension might be one of the old ones that we're reusing.
if (oldDim.equals(newBioAd))
continue;
boolean found = false;
for (RawExpressionDataVector oldV : dedvs) {
assert oldV.getDesignElement().equals(de);
assert oldV.getQuantitationType().equals(type);
if (oldV.getBioAssayDimension().equals(oldDim)) {
found = true;
this.convertFromBytes(mergedData, representation, oldV);
break;
}
}
if (!found) {
int missing = this.fillMissingValues(de, mergedData, oldDim, representation);
totalMissingInVector += missing;
}
}
return totalMissingInVector;
}
Aggregations