use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixTest method testMatrixConversion.
@Test
public void testMatrixConversion() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("")));
Collection<?> results = geoService.fetchAndLoad("GSE8294", false, true, false);
newee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
}
newee = expressionExperimentService.thaw(newee);
// make sure we really thawRawAndProcessed them, so we can get the design element sequences.
Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
rawExpressionDataVectorService.thaw(vectors);
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
ExpressionDataDoubleMatrix matrix = builder.getPreferredData();
assertTrue(!Double.isNaN(matrix.get(10, 0)));
assertEquals(66, matrix.rows());
assertEquals(9, matrix.columns());
/*
* Additional tests for files and outlier marking.
*/
processedDataVectorService.computeProcessedExpressionData(newee);
File f1 = expressionDataFileService.writeOrLocateDataFile(expressionExperimentService.load(newee.getId()), true, true);
assertNotNull(f1);
assertTrue(f1.exists());
expressionDataFileService.deleteAllFiles(newee);
assertTrue(!f1.exists());
/*
* outlier removal.
*/
BioAssay tba = newee.getBioAssays().iterator().next();
Collection<BioAssay> ol = new HashSet<>();
ol.add(tba);
sampleRemoveService.markAsMissing(ol);
assertTrue(tba.getIsOutlier());
newee = expressionExperimentService.thaw(newee);
Collection<ProcessedExpressionDataVector> vecs = newee.getProcessedExpressionDataVectors();
this.processedDataVectorService.thaw(vecs);
assertTrue(!vecs.isEmpty());
ExpressionDataMatrixBuilder matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
ExpressionDataDoubleMatrix data = matrixBuilder.getProcessedData();
assertNotNull(data);
assertTrue(Double.isNaN(data.getColumn(tba)[10]));
sampleRemoveService.unmarkAsMissing(ol);
newee = expressionExperimentService.load(newee.getId());
newee = expressionExperimentService.thaw(newee);
vecs = newee.getProcessedExpressionDataVectors();
this.processedDataVectorService.thaw(vecs);
assertTrue(!vecs.isEmpty());
matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
data = matrixBuilder.getProcessedData();
assertTrue(!tba.getIsOutlier());
assertTrue(!Double.isNaN(data.getColumn(tba)[10]));
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class DataUpdaterTest method testAddData.
@Test
public void testAddData() throws Exception {
/*
* Load a regular data set that has no data. Platform is (basically) irrelevant.
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
ExpressionExperiment ee;
try {
// RNA-seq data.
Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
// log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thawLite(ee);
List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
assertEquals(31, bioAssays.size());
List<BioMaterial> bms = new ArrayList<>();
for (BioAssay ba : bioAssays) {
bms.add(ba.getSampleUsed());
}
targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
/*
* make up some fake data on another platform, and match it to those samples
*/
for (int i = 0; i < rawMatrix.rows(); i++) {
for (int j = 0; j < rawMatrix.columns(); j++) {
rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
}
}
List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
rawMatrix.setRowNames(probes);
rawMatrix.setColumnNames(bms);
QuantitationType qt = this.makeQt(true);
ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
assertNotNull(data.getBestBioAssayDimension());
assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
assertEquals(probes.size(), data.getMatrix().rows());
/*
* Replace it.
*/
ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertEquals(100, ee.getRawExpressionDataVectors().size());
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertTrue(v.getQuantitationType().getIsPreferred());
}
assertEquals(100, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(31, v.getBioAssays().size());
}
/*
* Test adding data (non-preferred)
*/
qt = this.makeQt(false);
ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
ee = experimentService.thaw(ee);
try {
// add preferred data twice.
dataUpdater.addData(ee, targetArrayDesign, data);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// okay.
}
dataUpdater.deleteData(ee, qt);
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method getVectors.
/**
* Get the current set of vectors that need to be updated.
*
* @param expExp ee
* @param qts - only used to check for problems.
* @param allOldBioAssayDims old BA dims
* @return map
*/
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
for (BioAssayDimension dim : allOldBioAssayDims) {
oldVectors.addAll(rawExpressionDataVectorService.find(dim));
}
if (oldVectors.isEmpty()) {
throw new IllegalStateException("No vectors");
}
rawExpressionDataVectorService.thaw(oldVectors);
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
Collection<QuantitationType> qtsToAdd = new HashSet<>();
for (RawExpressionDataVector v : oldVectors) {
QuantitationType qt = v.getQuantitationType();
if (!qts.contains(qt)) {
/*
* Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
* have, due to corruption.
*/
qtsToAdd.add(qt);
}
if (!qt2Vec.containsKey(qt)) {
qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
}
qt2Vec.get(qt).add(v);
}
if (!qtsToAdd.isEmpty()) {
expExp.getQuantitationTypes().addAll(qtsToAdd);
VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
expressionExperimentService.update(expExp);
}
return qt2Vec;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method initializeNewVector.
/**
* Make a (non-persistent) vector that has the right bioAssayDimension, designelement and quantitationtype.
*
* @param expExp ee
* @param newBioAd new BA dim
* @param type type
* @param de de
* @return raw data vector
*/
private RawExpressionDataVector initializeNewVector(ExpressionExperiment expExp, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de) {
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setBioAssayDimension(newBioAd);
vector.setDesignElement(de);
vector.setQuantitationType(type);
vector.setExpressionExperiment(expExp);
return vector;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method makeMergedData.
/**
* @param sortedOldDims sorted old dims
* @param newBioAd new BA dims
* @param type type
* @param de de
* @param dedvs dedvs
* @param mergedData starts out empty, is initalized to the new data.
* @return number of values missing
*/
private int makeMergedData(List<BioAssayDimension> sortedOldDims, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de, Collection<RawExpressionDataVector> dedvs, List<Object> mergedData) {
int totalMissingInVector = 0;
PrimitiveType representation = type.getRepresentation();
for (BioAssayDimension oldDim : sortedOldDims) {
// careful, the 'new' bioAssayDimension might be one of the old ones that we're reusing.
if (oldDim.equals(newBioAd))
continue;
boolean found = false;
for (RawExpressionDataVector oldV : dedvs) {
assert oldV.getDesignElement().equals(de);
assert oldV.getQuantitationType().equals(type);
if (oldV.getBioAssayDimension().equals(oldDim)) {
found = true;
this.convertFromBytes(mergedData, representation, oldV);
break;
}
}
if (!found) {
int missing = this.fillMissingValues(de, mergedData, oldDim, representation);
totalMissingInVector += missing;
}
}
return totalMissingInVector;
}
Aggregations