Search in sources :

Example 1 with CoexpressionValueObject

use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.

the class LinkAnalysisServiceTest method checkResults.

private int checkResults(Collection<BioAssaySet> ees, int expectedMinimumMaxSupport) {
    boolean foundOne = false;
    int maxSupport = 0;
    Taxon mouse = taxonService.findByCommonName("mouse");
    Collection<Gene> genesWithLinks = new ArrayList<>();
    int totalLinks = 0;
    // numdatasetstesting will not be set so we won't bother checking.
    assertTrue(!geneCoexpressionService.getCoexpression(ee, true).isEmpty());
    Collection<CoexpressionValueObject> eeResults = geneCoexpressionService.getCoexpression(ee, false);
    assertTrue(!eeResults.isEmpty());
    for (CoexpressionValueObject coex : eeResults) {
        this.checkResult(coex);
    }
    Map<Long, GeneCoexpressionNodeDegreeValueObject> nodeDegrees = geneCoexpressionService.getNodeDegrees(EntityUtils.getIds(geneService.loadAll()));
    assertTrue(!nodeDegrees.isEmpty());
    // experiment-major query
    Map<Long, List<CoexpressionValueObject>> allLinks = geneCoexpressionService.findCoexpressionRelationships(mouse, new HashSet<Long>(), EntityUtils.getIds(ees), ees.size(), 10, false);
    assertTrue(!allLinks.isEmpty());
    for (Long g : allLinks.keySet()) {
        for (CoexpressionValueObject coex : allLinks.get(g)) {
            this.checkResult(coex);
        }
    }
    for (Gene gene : geneService.loadAll(mouse)) {
        Collection<CoexpressionValueObject> links = geneCoexpressionService.findCoexpressionRelationships(gene, EntityUtils.getIds(ees), 1, 0, false);
        if (links == null || links.isEmpty()) {
            continue;
        }
        assertEquals(geneCoexpressionService.findCoexpressionRelationships(gene, Collections.singleton(ee.getId()), 0, false).size(), geneCoexpressionService.countLinks(ee, gene).intValue());
        GeneCoexpressionNodeDegreeValueObject nodeDegree = geneCoexpressionService.getNodeDegree(gene);
        if (links.size() != nodeDegree.getLinksWithMinimumSupport(1)) {
            log.info(nodeDegree);
            assertEquals("Node degree check failed for gene " + gene, links.size(), nodeDegree.getLinksWithMinimumSupport(1).intValue());
        }
        assertTrue(nodeDegree.getLinksWithMinimumSupport(1) >= nodeDegree.getLinksWithMinimumSupport(2));
        totalLinks += links.size();
        log.debug(links.size() + " hits for " + gene);
        for (CoexpressionValueObject coex : links) {
            this.checkResult(coex);
            if (coex.getNumDatasetsSupporting() > maxSupport) {
                maxSupport = coex.getNumDatasetsSupporting();
            }
        }
        foundOne = true;
        if (genesWithLinks.size() == 5) {
            // without specifying stringency
            Map<Long, List<CoexpressionValueObject>> multiGeneResults = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 100, false);
            if (multiGeneResults.isEmpty()) {
                // noinspection ConstantConditions // these strange structures are to help with debugger.
                assertTrue(!multiGeneResults.isEmpty());
            }
            for (Long id : multiGeneResults.keySet()) {
                for (CoexpressionValueObject coex : multiGeneResults.get(id)) {
                    this.checkResult(coex);
                }
            }
            // with stringency specified, quick.
            Map<Long, List<CoexpressionValueObject>> multiGeneResults2 = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), ees.size(), 100, true);
            if (multiGeneResults.size() != multiGeneResults2.size()) {
                assertEquals(multiGeneResults.size(), multiGeneResults2.size());
            }
            for (Long id : multiGeneResults2.keySet()) {
                for (CoexpressionValueObject coex : multiGeneResults2.get(id)) {
                    this.checkResult(coex);
                }
            }
        }
        genesWithLinks.add(gene);
    }
    assertTrue(foundOne);
    Map<Long, List<CoexpressionValueObject>> mygeneresults = geneCoexpressionService.findInterCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 1, false);
    if (mygeneresults.isEmpty()) {
        // noinspection ConstantConditions // these strange structures are to help with debugger.
        assertTrue(!mygeneresults.isEmpty());
    }
    for (Long id : mygeneresults.keySet()) {
        for (CoexpressionValueObject coex : mygeneresults.get(id)) {
            this.checkResult(coex);
        }
    }
    assertTrue(maxSupport >= expectedMinimumMaxSupport);
    return totalLinks;
}
Also used : Taxon(ubic.gemma.model.genome.Taxon) Gene(ubic.gemma.model.genome.Gene) GeneCoexpressionNodeDegreeValueObject(ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject) CoexpressionValueObject(ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject)

Example 2 with CoexpressionValueObject

use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method writeCoexpressionData.

/**
 * Loads the probe to probe coexpression link information for a given expression experiment and writes it to disk.
 */
private void writeCoexpressionData(File file, ExpressionExperiment ee) throws IOException {
    Taxon tax = expressionExperimentService.getTaxon(ee);
    assert tax != null;
    Collection<CoexpressionValueObject> geneLinks = gene2geneCoexpressionService.getCoexpression(ee, true);
    Date timestamp = new Date(System.currentTimeMillis());
    StringBuilder buf = new StringBuilder();
    // Write header information
    buf.append("# Coexpression data for:  ").append(ee.getShortName()).append(" : ").append(ee.getName()).append(" \n");
    buf.append("# Generated On: ").append(timestamp).append(" \n");
    buf.append("# Links are listed in an arbitrary order with an indication of positive or negative correlation\n");
    buf.append(ExpressionDataFileService.DISCLAIMER);
    buf.append("GeneSymbol1\tGeneSymbol2\tDirection\tSupport\n");
    // Data
    for (CoexpressionValueObject link : geneLinks) {
        buf.append(link.getQueryGeneSymbol()).append("\t").append(link.getCoexGeneSymbol()).append("\t");
        buf.append(link.isPositiveCorrelation() ? "+" : "-" + "\n");
    }
    // Write coexpression data to file (zipped of course)
    try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {
        writer.write(buf.toString());
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) Taxon(ubic.gemma.model.genome.Taxon) CoexpressionValueObject(ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject) MatrixWriter(ubic.gemma.core.datastructure.matrix.MatrixWriter) ExperimentalDesignWriter(ubic.gemma.core.datastructure.matrix.ExperimentalDesignWriter)

Example 3 with CoexpressionValueObject

use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.

the class GeneCoexpressionSearchServiceImpl method addExtCoexpressionValueObjects.

/**
 * Convert CoexpressionValueObjects to CoexpressionValueObjectExts
 *
 * @param queryGeneIds all those included in the query
 */
private List<CoexpressionValueObjectExt> addExtCoexpressionValueObjects(GeneValueObject queryGene, Collection<CoexpressionValueObject> coexp, boolean queryGenesOnly, Collection<Long> queryGeneIds) {
    List<CoexpressionValueObjectExt> results = new ArrayList<>();
    Collection<Long> coexpGenes = new HashSet<>();
    for (CoexpressionValueObject cvo : coexp) {
        coexpGenes.add(cvo.getCoexGeneId());
    }
    // database hit. loadValueObjects is too slow.
    Map<Long, GeneValueObject> coexpedGenes = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(coexpGenes));
    for (CoexpressionValueObject cvo : coexp) {
        /*
             * sanity check, this is sort of debug code. Each link has to contain at least one query gene.
             */
        if (queryGenesOnly) {
            if (!queryGeneIds.contains(cvo.getCoexGeneId())) {
                GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getCoexGeneId() + " is not a query");
                continue;
            }
            if (!queryGeneIds.contains(cvo.getQueryGeneId())) {
                GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getQueryGeneId() + " is not a query");
                continue;
            }
        }
        CoexpressionValueObjectExt ecVo = new CoexpressionValueObjectExt();
        ecVo.setQueryGene(queryGene);
        GeneValueObject foundGene = coexpedGenes.get(cvo.getCoexGeneId());
        if (!queryGeneIds.contains(foundGene.getId())) {
            foundGene.setIsQuery(false);
        } else {
            foundGene.setIsQuery(true);
        }
        ecVo.setFoundGene(foundGene);
        if (cvo.isPositiveCorrelation()) {
            ecVo.setPosSupp(cvo.getNumDatasetsSupporting());
        } else {
            ecVo.setNegSupp(cvo.getNumDatasetsSupporting());
        }
        // when 'quick', these will not necessarily be set.
        ecVo.setNumTestedIn(cvo.getNumDatasetsTestedIn());
        ecVo.setSupportingExperiments(cvo.getSupportingDatasets());
        ecVo.setSortKey();
        results.add(ecVo);
        assert ecVo.getPosSupp() > 0 || ecVo.getNegSupp() > 0;
        assert ecVo.getFoundGene() != null;
        assert ecVo.getQueryGene() != null;
    }
    return results;
}
Also used : GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) CoexpressionValueObject(ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject)

Example 4 with CoexpressionValueObject

use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.

the class GeneCoexpressionSearchServiceImpl method doCoexpressionSearch.

/**
 * @param genes          1 or more.
 * @param stringency;    this may be modified to control the number of results, unless "queryGenesOnly" is true.
 * @param maxResults     per gene, not including the query genes themselves. Ignored if this is 'queryGenesOnly'
 * @param queryGenesOnly will be ignored if number of genes is 1.
 * @return CoexpressionMetaValueObject, in which the results are already populated and sorted.
 */
private CoexpressionMetaValueObject doCoexpressionSearch(Collection<Long> inputEeIds, Collection<Long> genes, int stringency, final int maxResults, final boolean queryGenesOnly, final boolean quick) {
    if (genes.isEmpty()) {
        CoexpressionMetaValueObject r = new CoexpressionMetaValueObject();
        r.setErrorState("No genes selected");
        return r;
    }
    boolean actuallyUseQueryGeneOnly = queryGenesOnly && genes.size() > 1;
    Taxon taxon = this.geneService.load(genes.iterator().next()).getTaxon();
    List<ExpressionExperimentValueObject> eevos = this.getFilteredEEVos(inputEeIds, taxon);
    CoexpressionMetaValueObject result = this.initValueObject(genes, eevos);
    if (eevos.isEmpty()) {
        result = new CoexpressionMetaValueObject();
        result.setErrorState("No experiments selected");
        return result;
    }
    Collection<Long> eeIds = EntityUtils.getIds(eevos);
    Map<Long, List<CoexpressionValueObject>> allCoexpressions;
    if (genes.size() > GeneCoexpressionSearchServiceImpl.THRESHOLD_TRIGGER_QUERY_GENES_ONLY) {
        if (!actuallyUseQueryGeneOnly) {
            GeneCoexpressionSearchServiceImpl.log.info("Switching to 'query genes only'");
        }
        actuallyUseQueryGeneOnly = true;
    }
    if (stringency < 1)
        stringency = 1;
    if (!queryGenesOnly) {
        stringency = Math.max(stringency, this.chooseStringency(actuallyUseQueryGeneOnly, eeIds.size(), genes.size()));
        GeneCoexpressionSearchServiceImpl.log.info("Stringency set to " + stringency + " based on number of experiments (" + eeIds.size() + ") and genes (" + genes.size() + ") queried");
    } else {
        GeneCoexpressionSearchServiceImpl.log.info("Query gene only: stringency maintained at requested value=" + stringency);
    }
    assert stringency >= 1 || eeIds.size() == 1;
    // HACK drop the stringency until we get some results.
    int stepSize = 3;
    while (true) {
        if (actuallyUseQueryGeneOnly) {
            // note that maxResults is ignored.
            if (genes.size() < 2) {
                // should be impossible - could assert.
                throw new IllegalArgumentException("cannot do inter-gene coexpression search with only one gene");
            }
            allCoexpressions = coexpressionService.findInterCoexpressionRelationships(taxon, genes, eeIds, stringency, quick);
        } else {
            allCoexpressions = coexpressionService.findCoexpressionRelationships(taxon, genes, eeIds, stringency, maxResults, quick);
        }
        int minimum_stringency_for_requery = 2;
        if (allCoexpressions.isEmpty() && stringency > minimum_stringency_for_requery) {
            // step size completely made up.
            stringency -= stepSize;
            // keep stringency at least 2.
            stringency = Math.max(minimum_stringency_for_requery, stringency);
            GeneCoexpressionSearchServiceImpl.log.info("No results, re-querying with stringency=" + stringency);
        } else {
            // no results.
            break;
        }
    }
    GeneCoexpressionSearchServiceImpl.log.info("Final actual stringency used was " + stringency);
    result.setQueryStringency(stringency);
    result.setQueryGenesOnly(actuallyUseQueryGeneOnly);
    Set<Long> queryGeneIds = allCoexpressions.keySet();
    assert genes.containsAll(queryGeneIds);
    Map<Long, GeneValueObject> idMap = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(queryGeneIds));
    int k = 0;
    for (Long queryGene : queryGeneIds) {
        Collection<CoexpressionValueObject> coexpressions = allCoexpressions.get(queryGene);
        List<CoexpressionValueObjectExt> results = this.addExtCoexpressionValueObjects(idMap.get(queryGene), coexpressions, actuallyUseQueryGeneOnly, genes);
        result.getResults().addAll(results);
        CoexpressionSummaryValueObject summary = new CoexpressionSummaryValueObject(queryGene);
        summary.setDatasetsAvailable(eevos.size());
        summary.setLinksFound(coexpressions.size());
        result.getSummaries().put(queryGene, summary);
        if (++k % 100 == 0) {
            GeneCoexpressionSearchServiceImpl.log.info("Processed results for " + k + " query genes ...");
        }
    }
    Collections.sort(result.getResults());
    // FIXME we might want to suppress this in some situations
    if (!queryGenesOnly) {
        result.trim();
    }
    this.populateNodeDegrees(result);
    return result;
}
Also used : Taxon(ubic.gemma.model.genome.Taxon) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) CoexpressionValueObject(ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject)

Aggregations

CoexpressionValueObject (ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject)4 Taxon (ubic.gemma.model.genome.Taxon)3 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)2 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 ExperimentalDesignWriter (ubic.gemma.core.datastructure.matrix.ExperimentalDesignWriter)1 MatrixWriter (ubic.gemma.core.datastructure.matrix.MatrixWriter)1 GeneCoexpressionNodeDegreeValueObject (ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject)1 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)1 Gene (ubic.gemma.model.genome.Gene)1