Search in sources :

Example 11 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DEDVController method getDEDVForDiffExVisualization.

/**
 * AJAX exposed method - for ProbeLevelDiffExGrid, VisualizationDifferentialWindow,
 * DifferentialExpressionAnalysesSummaryTree
 *
 * @param eeIds     FIXME accommodate ExpressionExperimentSubSets. Currently we pass in the "source experiment" so we
 *                  don't get the slice.
 * @param geneIds   (could be just one)
 * @param threshold for 'significance'
 * @param factorMap Collection of DiffExpressionSelectedFactorCommand showing which factors to use.
 */
public VisualizationValueObject[] getDEDVForDiffExVisualization(Collection<Long> eeIds, Collection<Long> geneIds, Double threshold, Collection<DiffExpressionSelectedFactorCommand> factorMap) {
    if (eeIds.isEmpty() || geneIds.isEmpty())
        return null;
    StopWatch watch = new StopWatch();
    watch.start();
    Collection<? extends BioAssaySet> ees = expressionExperimentService.load(eeIds);
    if (ees == null || ees.isEmpty())
        return null;
    Collection<Gene> genes = geneService.load(geneIds);
    if (genes == null || genes.isEmpty())
        return null;
    Collection<DoubleVectorValueObject> dedvs = processedExpressionDataVectorService.getProcessedDataArrays(ees, geneIds);
    watch.stop();
    Long time = watch.getTime();
    log.info("Retrieved " + dedvs.size() + " DEDVs for " + eeIds.size() + " EEs and " + geneIds.size() + " genes in " + time + " ms.");
    watch = new StopWatch();
    watch.start();
    Map<Long, LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>>> layouts;
    layouts = experimentalDesignVisualizationService.sortVectorDataByDesign(dedvs);
    time = watch.getTime();
    if (time > 100) {
        log.info("Ran sortVectorDataByDesign on " + dedvs.size() + " DEDVs for 1 EE" + " in " + time + " ms (times <100ms not reported).");
    }
    // layouts = experimentalDesignVisualizationService.sortLayoutSamplesByFactor( layouts ); // required? yes, see
    // GSE11859
    time = watch.getTime();
    if (time > 100) {
        log.info("Ran sortLayoutSamplesByFactor on " + layouts.size() + " layouts" + " in " + time + " ms (times <100ms not reported).");
    }
    watch = new StopWatch();
    watch.start();
    Map<Long, Collection<DifferentialExpressionValueObject>> validatedProbes = getProbeDiffExValidation(genes, threshold, factorMap);
    watch.stop();
    time = watch.getTime();
    log.info("Retrieved " + validatedProbes.size() + " valid probes in " + time + " ms.");
    return makeDiffVisCollection(dedvs, new ArrayList<>(geneIds), validatedProbes, layouts);
}
Also used : StopWatch(org.apache.commons.lang3.time.StopWatch) Gene(ubic.gemma.model.genome.Gene) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 12 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DEDVController method format4File.

private String format4File(Collection<DoubleVectorValueObject> vectors) {
    StringBuilder converted = new StringBuilder();
    converted.append("# Generated by Gemma\n# ").append(new Date()).append("\n");
    converted.append(ExpressionDataFileService.DISCLAIMER + "#\n");
    boolean didHeader = false;
    Map<Long, GeneValueObject> gmap = getGeneValueObjectsUsed(vectors);
    for (DoubleVectorValueObject vec : vectors) {
        if (!didHeader) {
            converted.append(makeHeader(vec));
            didHeader = true;
        }
        List<String> geneSymbols = new ArrayList<>();
        List<String> geneNames = new ArrayList<>();
        for (Long g : vec.getGenes()) {
            GeneValueObject gene = gmap.get(g);
            assert gene != null;
            geneSymbols.add(gene.getOfficialSymbol());
            geneNames.add(gene.getOfficialName());
        }
        converted.append(StringUtils.join(geneSymbols, "|")).append("\t").append(StringUtils.join(geneNames, "|")).append("\t");
        converted.append(vec.getDesignElement().getName()).append("\t");
        if (vec.getData() != null || vec.getData().length != 0) {
            for (double data : vec.getData()) {
                converted.append(String.format("%.3f", data)).append("\t");
            }
            // remove the trailing tab // FIXME just joind
            converted.deleteCharAt(converted.length() - 1);
        }
        converted.append("\n");
    }
    return converted.toString();
}
Also used : GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 13 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class DEDVController method getProbeLinkValidation.

/**
 * Identify which probes were 'responsible' for the coexpression links.
 * FIXME change this to actually compute the correlations.
 *
 * @return map of EEID -> collection ProbeIDs which underlie the stored coexpression links.
 */
private Map<Long, Collection<Long>> getProbeLinkValidation(Collection<ExpressionExperiment> ees, Gene queryGene, Gene coexpressedGene, Collection<DoubleVectorValueObject> dedvs) {
    StopWatch watch = new StopWatch();
    watch.start();
    Map<Long, Collection<Long>> coexpressedEE2ProbeIds = new HashMap<>();
    Map<Long, Collection<Long>> queryEE2ProbeIds = new HashMap<>();
    /*
         * Get the probes for the vectors, organize by ee.
         */
    for (DoubleVectorValueObject dedv : dedvs) {
        ExpressionExperimentValueObject ee = dedv.getExpressionExperiment();
        if (dedv.getGenes().contains(queryGene.getId())) {
            if (!queryEE2ProbeIds.containsKey(ee.getId())) {
                queryEE2ProbeIds.put(ee.getId(), new HashSet<Long>());
            }
            queryEE2ProbeIds.get(ee.getId()).add(dedv.getDesignElement().getId());
        } else if (dedv.getGenes().contains(coexpressedGene.getId())) {
            if (!coexpressedEE2ProbeIds.containsKey(ee.getId())) {
                coexpressedEE2ProbeIds.put(ee.getId(), new HashSet<Long>());
            }
            coexpressedEE2ProbeIds.get(ee.getId()).add(dedv.getDesignElement().getId());
        } else {
            log.error("Dedv doesn't belong to coexpressed or query gene. QueryGene= " + queryGene + "CoexpressedGene= " + coexpressedGene + "DEDV " + dedv.getId() + " has genes: " + dedv.getGenes());
        }
    }
    Map<Long, Collection<Long>> validatedProbes = new HashMap<>();
    for (ExpressionExperiment ee : ees) {
        Collection<Long> queryProbeIds = queryEE2ProbeIds.get(ee.getId());
        Collection<Long> coexpressedProbeIds = coexpressedEE2ProbeIds.get(ee.getId());
        if (queryProbeIds == null || queryProbeIds.isEmpty()) {
            log.warn("Unexpectedly no probes for " + queryGene + " in " + ee);
            continue;
        }
        if (coexpressedProbeIds == null || coexpressedProbeIds.isEmpty()) {
            log.warn("Unexpectedly no probes for " + coexpressedGene + " in " + ee);
        }
    /*
             * Note: this does a probe-level query FIXME if we don't store data at probe-level we can't do this.
             */
    // Collection<Long> probesInLinks = this.geneCoexpressionService.getCoexpressedProbes( queryProbeIds,
    // coexpressedProbeIds, ee, queryGene.getTaxon().getCommonName() );
    // if ( probesInLinks.isEmpty() ) {
    // log.warn( "Unexpectedly no probes for link between " + queryGene + " -and- " + coexpressedGene + " in "
    // + ee );
    // }
    // 
    // validatedProbes.put( ee.getId(), probesInLinks );
    // FIXME FIXME
    }
    watch.stop();
    Long time = watch.getTime();
    if (time > 1000) {
        log.info("Validation of probes for " + ees.size() + " experiments in " + time + "ms.");
    }
    return validatedProbes;
}
Also used : DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 14 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.

/**
 * Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
 */
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
        Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.thaw(preferredVectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
    assertEquals(10, mat.columns());
    boolean found = false;
    for (int i = 0; i < mat.rows(); i++) {
        Double[] row = mat.getRow(i);
        // debugging
        if (i == 0) {
            for (int j = 0; j < row.length; j++) {
                BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
                System.err.println(ba.getName());
            }
        }
        System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        CompositeSequence el = mat.getDesignElementForRow(i);
        for (int j = 0; j < row.length; j++) {
            BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
            if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
    /*
         * Now do this through the processedExpressionDataVectorService
         */
    Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(30, da.size());
    found = false;
    boolean first = true;
    for (DoubleVectorValueObject v : da) {
        CompositeSequenceValueObject el = v.getDesignElement();
        double[] row = v.getData();
        // debugging
        if (first) {
            for (int j = 0; j < row.length; j++) {
                BioAssayValueObject ba = v.getBioAssays().get(j);
                System.err.println(ba.getName());
            }
            first = false;
        }
        System.err.print(el.getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        assertEquals(10, row.length);
        for (int j = 0; j < row.length; j++) {
            assertNotNull(v.getBioAssays());
            BioAssayValueObject ba = v.getBioAssays().get(j);
            if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 15 with DoubleVectorValueObject

use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.

the class VectorMergingServiceTest method test.

@Test
public final void test() throws Exception {
    /*
         * Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
         * and merge the vectors. GSE3443
         */
    /*
         * The experiment uses the following GPLs
         *
         * GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
         *
         * Example of a sequence appearing on more than one platform: N57553
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));
    Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
    ee = (ExpressionExperiment) results.iterator().next();
    ee = this.eeService.thawLite(ee);
    Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);
    assertEquals(7, aas.size());
    /*
         * Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
         * extras for duplicated sequences (e.g. elements that don't have a sequence...)
         */
    Collection<ArrayDesign> taas = new HashSet<>();
    Set<BioSequence> oldbs = new HashSet<>();
    for (ArrayDesign arrayDesign : aas) {
        arrayDesign = arrayDesignService.thaw(arrayDesign);
        taas.add(arrayDesign);
        for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
            log.info(cs + " " + cs.getBiologicalCharacteristic());
            oldbs.add(cs.getBiologicalCharacteristic());
        }
    }
    assertEquals(63, oldbs.size());
    /*
         * Check total size of elements across all 7 platforms.
         */
    int totalElements = 0;
    for (ArrayDesign arrayDesign : taas) {
        totalElements += arrayDesign.getCompositeSequences().size();
    }
    assertEquals(140, totalElements);
    ArrayDesign firstaa = taas.iterator().next();
    aas.remove(firstaa);
    assertEquals(null, firstaa.getMergedInto());
    mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5), "merged" + RandomStringUtils.randomAlphabetic(5), false);
    assertEquals(72, mergedAA.getCompositeSequences().size());
    Set<BioSequence> seenBs = new HashSet<>();
    for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
        seenBs.add(cs.getBiologicalCharacteristic());
    }
    assertEquals(63, seenBs.size());
    // just to make this explicit. The new array design has to contain all the old sequences.
    assertEquals(oldbs.size(), seenBs.size());
    ee = eeService.thaw(ee);
    assertEquals(1828, ee.getRawExpressionDataVectors().size());
    ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
    ee = eeService.thaw(ee);
    // check we actually got switched over.
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(mergedAA, ba.getArrayDesignUsed());
    }
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
    }
    assertEquals(15, ee.getQuantitationTypes().size());
    assertEquals(1828, ee.getRawExpressionDataVectors().size());
    ee = vectorMergingService.mergeVectors(ee);
    // check we got the right processed data
    Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    assertEquals(72, pvs.size());
    ee = eeService.thaw(ee);
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee, 50);
    assertEquals(50, processedDataArrays.size());
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) HashSet(java.util.HashSet) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)31 StopWatch (org.apache.commons.lang3.time.StopWatch)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)9 Test (org.junit.Test)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 BioAssayValueObject (ubic.gemma.model.expression.bioAssay.BioAssayValueObject)6 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 Gene (ubic.gemma.model.genome.Gene)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)4 ExperimentExpressionLevelsValueObject (ubic.gemma.model.expression.bioAssayData.ExperimentExpressionLevelsValueObject)4 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)4 InputStream (java.io.InputStream)3 Transactional (org.springframework.transaction.annotation.Transactional)3 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)3 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 ProbeLoading (ubic.gemma.model.analysis.expression.pca.ProbeLoading)3 VisualizationValueObject (ubic.gemma.web.controller.visualization.VisualizationValueObject)3