Search in sources :

Example 1 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DataUpdaterTest method testAddData.

@Test
public void testAddData() throws Exception {
    /*
         * Load a regular data set that has no data. Platform is (basically) irrelevant.
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    ExpressionExperiment ee;
    try {
        // RNA-seq data.
        Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        // log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thawLite(ee);
    List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
    assertEquals(31, bioAssays.size());
    List<BioMaterial> bms = new ArrayList<>();
    for (BioAssay ba : bioAssays) {
        bms.add(ba.getSampleUsed());
    }
    targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
    DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
    /*
         * make up some fake data on another platform, and match it to those samples
         */
    for (int i = 0; i < rawMatrix.rows(); i++) {
        for (int j = 0; j < rawMatrix.columns(); j++) {
            rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
        }
    }
    List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
    rawMatrix.setRowNames(probes);
    rawMatrix.setColumnNames(bms);
    QuantitationType qt = this.makeQt(true);
    ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    assertNotNull(data.getBestBioAssayDimension());
    assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
    assertEquals(probes.size(), data.getMatrix().rows());
    /*
         * Replace it.
         */
    ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertEquals(100, ee.getRawExpressionDataVectors().size());
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertTrue(v.getQuantitationType().getIsPreferred());
    }
    assertEquals(100, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(31, v.getBioAssays().size());
    }
    /*
         * Test adding data (non-preferred)
         */
    qt = this.makeQt(false);
    ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
    ee = experimentService.thaw(ee);
    try {
        // add preferred data twice.
        dataUpdater.addData(ee, targetArrayDesign, data);
        fail("Should have gotten an exception");
    } catch (IllegalArgumentException e) {
    // okay.
    }
    dataUpdater.deleteData(ee, qt);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ArrayList(java.util.ArrayList) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 2 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqDataWithMissingSamples.

/*
     * Test case where some samples cannot be used.
     *
     */
@Test
public void testLoadRNASeqDataWithMissingSamples() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = experimentService.findByShortName("GSE29006");
    if (ee != null) {
        experimentService.remove(ee);
    }
    assertTrue(experimentService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        try {
            dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
            fail("Should have gotten an exception");
        } catch (IllegalArgumentException e) {
        // Expected
        }
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
    }
    /*
         * Check
         */
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    assertTrue(mat.getQuantitationTypes().iterator().next().getName().startsWith("log2cpm"));
    assertEquals(4, ee.getBioAssays().size());
    assertEquals(199 * 3, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 3 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method getTopLoadedVectors.

@Override
public Map<ProbeLoading, DoubleVectorValueObject> getTopLoadedVectors(ExpressionExperiment ee, int component, int count) {
    PrincipalComponentAnalysis pca = principalComponentAnalysisService.loadForExperiment(ee);
    Map<ProbeLoading, DoubleVectorValueObject> result = new HashMap<>();
    if (pca == null) {
        return result;
    }
    List<ProbeLoading> topLoadedProbes = principalComponentAnalysisService.getTopLoadedProbes(ee, component, count);
    if (topLoadedProbes == null) {
        SVDServiceHelperImpl.log.warn("No probes?");
        return result;
    }
    Map<Long, ProbeLoading> probes = new LinkedHashMap<>();
    Set<CompositeSequence> p = new HashSet<>();
    for (ProbeLoading probeLoading : topLoadedProbes) {
        CompositeSequence probe = probeLoading.getProbe();
        probes.put(probe.getId(), probeLoading);
        p.add(probe);
    }
    if (probes.isEmpty())
        return result;
    assert probes.size() <= count;
    Collection<ExpressionExperiment> ees = new HashSet<>();
    ees.add(ee);
    Collection<DoubleVectorValueObject> dvVos = processedExpressionDataVectorService.getProcessedDataArraysByProbe(ees, p);
    if (dvVos.isEmpty()) {
        SVDServiceHelperImpl.log.warn("No vectors came back from the call; check the Gene2CS table?");
        return result;
    }
    // note that this might have come from a cache.
    /*
         * This is actually expected, because we go through the genes.
         */
    BioAssayDimension bioAssayDimension = pca.getBioAssayDimension();
    assert bioAssayDimension != null;
    assert !bioAssayDimension.getBioAssays().isEmpty();
    for (DoubleVectorValueObject vct : dvVos) {
        ProbeLoading probeLoading = probes.get(vct.getDesignElement().getId());
        if (probeLoading == null) {
            /*
                 * This is okay, we will skip this probe. It was another probe for a gene that _was_ highly loaded.
                 */
            continue;
        }
        assert bioAssayDimension.getBioAssays().size() == vct.getData().length;
        vct.setRank(probeLoading.getLoadingRank().doubleValue());
        vct.setExpressionExperiment(new ExpressionExperimentValueObject(ee));
        result.put(probeLoading, vct);
    }
    if (result.isEmpty()) {
        SVDServiceHelperImpl.log.warn("No results, something went wrong; there were " + dvVos.size() + " vectors to start but they all got filtered out.");
    }
    return result;
}
Also used : ProbeLoading(ubic.gemma.model.analysis.expression.pca.ProbeLoading) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 4 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class CacheKey method get.

@Override
public Collection<DoubleVectorValueObject> get(BioAssaySet ee, Long g) {
    Element element = cache.get(new CacheKey(ee.getId(), g));
    if (element == null)
        return null;
    @SuppressWarnings("unchecked") Collection<DoubleVectorValueObject> result = (Collection<DoubleVectorValueObject>) element.getObjectValue();
    /*
         * See 2878 - we don't want to keep these values cached, so the vectors can be re-used.
         */
    for (DoubleVectorValueObject dvvo : result) {
        dvvo.setPvalue(null);
    }
    return result;
}
Also used : Element(net.sf.ehcache.Element) Collection(java.util.Collection) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 5 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceImpl method addExperimentGeneVectors.

/**
 * Creates an ExperimentExpressionLevelValueObject for the given experiment and collection of double vector VOs, and
 * adds it to the given vos collection.
 *
 * @param vos     the collection to add the result to.
 * @param ee      the experiment the vectors belong to.
 * @param vectors the vectors to create the new ExperimentExpressionLevelsVO with.
 */
private void addExperimentGeneVectors(Collection<ExperimentExpressionLevelsValueObject> vos, ExpressionExperiment ee, Collection<DoubleVectorValueObject> vectors, boolean keepGeneNonSpecific, String consolidateMode) {
    Map<Gene, List<DoubleVectorValueObject>> vectorsPerGene = new HashMap<>();
    if (vectors == null) {
        return;
    }
    for (DoubleVectorValueObject v : vectors) {
        if (!v.getExpressionExperiment().getId().equals(ee.getId())) {
            continue;
        }
        if (v.getGenes() == null || v.getGenes().isEmpty()) {
            if (!vectorsPerGene.containsKey(null)) {
                vectorsPerGene.put(null, new LinkedList<DoubleVectorValueObject>());
            }
            vectorsPerGene.get(null).add(v);
        }
        for (Long gId : v.getGenes()) {
            Gene g = geneService.load(gId);
            if (g != null) {
                if (!vectorsPerGene.containsKey(g)) {
                    vectorsPerGene.put(g, new LinkedList<DoubleVectorValueObject>());
                }
                vectorsPerGene.get(g).add(v);
            }
        }
    }
    vos.add(new ExperimentExpressionLevelsValueObject(ee.getId(), vectorsPerGene, keepGeneNonSpecific, consolidateMode));
}
Also used : Gene(ubic.gemma.model.genome.Gene) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ExperimentExpressionLevelsValueObject(ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject)

Aggregations

DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)31 StopWatch (org.apache.commons.lang3.time.StopWatch)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)9 Test (org.junit.Test)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 BioAssayValueObject (ubic.gemma.model.expression.bioAssay.BioAssayValueObject)6 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 Gene (ubic.gemma.model.genome.Gene)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)4 ExperimentExpressionLevelsValueObject (ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject)4 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)4 InputStream (java.io.InputStream)3 Transactional (org.springframework.transaction.annotation.Transactional)3 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)3 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 ProbeLoading (ubic.gemma.model.analysis.expression.pca.ProbeLoading)3 VisualizationValueObject (ubic.gemma.web.controller.visualization.VisualizationValueObject)3