use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DataUpdaterTest method testAddData.
@Test
public void testAddData() throws Exception {
/*
* Load a regular data set that has no data. Platform is (basically) irrelevant.
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
ExpressionExperiment ee;
try {
// RNA-seq data.
Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
// log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thawLite(ee);
List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
assertEquals(31, bioAssays.size());
List<BioMaterial> bms = new ArrayList<>();
for (BioAssay ba : bioAssays) {
bms.add(ba.getSampleUsed());
}
targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
/*
* make up some fake data on another platform, and match it to those samples
*/
for (int i = 0; i < rawMatrix.rows(); i++) {
for (int j = 0; j < rawMatrix.columns(); j++) {
rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
}
}
List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
rawMatrix.setRowNames(probes);
rawMatrix.setColumnNames(bms);
QuantitationType qt = this.makeQt(true);
ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
assertNotNull(data.getBestBioAssayDimension());
assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
assertEquals(probes.size(), data.getMatrix().rows());
/*
* Replace it.
*/
ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertEquals(100, ee.getRawExpressionDataVectors().size());
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertTrue(v.getQuantitationType().getIsPreferred());
}
assertEquals(100, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(31, v.getBioAssays().size());
}
/*
* Test adding data (non-preferred)
*/
qt = this.makeQt(false);
ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
ee = experimentService.thaw(ee);
try {
// add preferred data twice.
dataUpdater.addData(ee, targetArrayDesign, data);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// okay.
}
dataUpdater.deleteData(ee, qt);
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DataUpdaterTest method testLoadRNASeqDataWithMissingSamples.
/*
* Test case where some samples cannot be used.
*
*/
@Test
public void testLoadRNASeqDataWithMissingSamples() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = experimentService.findByShortName("GSE29006");
if (ee != null) {
experimentService.remove(ee);
}
assertTrue(experimentService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = experimentService.thaw(ee);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
try {
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// Expected
}
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
}
/*
* Check
*/
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
assertEquals(199, mat.rows());
assertTrue(mat.getQuantitationTypes().iterator().next().getName().startsWith("log2cpm"));
assertEquals(4, ee.getBioAssays().size());
assertEquals(199 * 3, ee.getRawExpressionDataVectors().size());
assertEquals(199, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method getTopLoadedVectors.
@Override
public Map<ProbeLoading, DoubleVectorValueObject> getTopLoadedVectors(ExpressionExperiment ee, int component, int count) {
PrincipalComponentAnalysis pca = principalComponentAnalysisService.loadForExperiment(ee);
Map<ProbeLoading, DoubleVectorValueObject> result = new HashMap<>();
if (pca == null) {
return result;
}
List<ProbeLoading> topLoadedProbes = principalComponentAnalysisService.getTopLoadedProbes(ee, component, count);
if (topLoadedProbes == null) {
SVDServiceHelperImpl.log.warn("No probes?");
return result;
}
Map<Long, ProbeLoading> probes = new LinkedHashMap<>();
Set<CompositeSequence> p = new HashSet<>();
for (ProbeLoading probeLoading : topLoadedProbes) {
CompositeSequence probe = probeLoading.getProbe();
probes.put(probe.getId(), probeLoading);
p.add(probe);
}
if (probes.isEmpty())
return result;
assert probes.size() <= count;
Collection<ExpressionExperiment> ees = new HashSet<>();
ees.add(ee);
Collection<DoubleVectorValueObject> dvVos = processedExpressionDataVectorService.getProcessedDataArraysByProbe(ees, p);
if (dvVos.isEmpty()) {
SVDServiceHelperImpl.log.warn("No vectors came back from the call; check the Gene2CS table?");
return result;
}
// note that this might have come from a cache.
/*
* This is actually expected, because we go through the genes.
*/
BioAssayDimension bioAssayDimension = pca.getBioAssayDimension();
assert bioAssayDimension != null;
assert !bioAssayDimension.getBioAssays().isEmpty();
for (DoubleVectorValueObject vct : dvVos) {
ProbeLoading probeLoading = probes.get(vct.getDesignElement().getId());
if (probeLoading == null) {
/*
* This is okay, we will skip this probe. It was another probe for a gene that _was_ highly loaded.
*/
continue;
}
assert bioAssayDimension.getBioAssays().size() == vct.getData().length;
vct.setRank(probeLoading.getLoadingRank().doubleValue());
vct.setExpressionExperiment(new ExpressionExperimentValueObject(ee));
result.put(probeLoading, vct);
}
if (result.isEmpty()) {
SVDServiceHelperImpl.log.warn("No results, something went wrong; there were " + dvVos.size() + " vectors to start but they all got filtered out.");
}
return result;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class CacheKey method get.
@Override
public Collection<DoubleVectorValueObject> get(BioAssaySet ee, Long g) {
Element element = cache.get(new CacheKey(ee.getId(), g));
if (element == null)
return null;
@SuppressWarnings("unchecked") Collection<DoubleVectorValueObject> result = (Collection<DoubleVectorValueObject>) element.getObjectValue();
/*
* See 2878 - we don't want to keep these values cached, so the vectors can be re-used.
*/
for (DoubleVectorValueObject dvvo : result) {
dvvo.setPvalue(null);
}
return result;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceImpl method addExperimentGeneVectors.
/**
* Creates an ExperimentExpressionLevelValueObject for the given experiment and collection of double vector VOs, and
* adds it to the given vos collection.
*
* @param vos the collection to add the result to.
* @param ee the experiment the vectors belong to.
* @param vectors the vectors to create the new ExperimentExpressionLevelsVO with.
*/
private void addExperimentGeneVectors(Collection<ExperimentExpressionLevelsValueObject> vos, ExpressionExperiment ee, Collection<DoubleVectorValueObject> vectors, boolean keepGeneNonSpecific, String consolidateMode) {
Map<Gene, List<DoubleVectorValueObject>> vectorsPerGene = new HashMap<>();
if (vectors == null) {
return;
}
for (DoubleVectorValueObject v : vectors) {
if (!v.getExpressionExperiment().getId().equals(ee.getId())) {
continue;
}
if (v.getGenes() == null || v.getGenes().isEmpty()) {
if (!vectorsPerGene.containsKey(null)) {
vectorsPerGene.put(null, new LinkedList<DoubleVectorValueObject>());
}
vectorsPerGene.get(null).add(v);
}
for (Long gId : v.getGenes()) {
Gene g = geneService.load(gId);
if (g != null) {
if (!vectorsPerGene.containsKey(g)) {
vectorsPerGene.put(g, new LinkedList<DoubleVectorValueObject>());
}
vectorsPerGene.get(g).add(v);
}
}
}
vos.add(new ExperimentExpressionLevelsValueObject(ee.getId(), vectorsPerGene, keepGeneNonSpecific, consolidateMode));
}
Aggregations