Search in sources :

Example 1 with NonPersistentNonOrderedCoexpLink

use of ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink in project Gemma by PavlidisLab.

the class Gene2GeneCoexpressionServiceTest method setup.

@Before
public void setup() {
    Taxon mouseTaxon = taxonS.findByCommonName("mouse");
    firstGene = Gene.Factory.newInstance();
    firstGene.setName("test_gene2geneCoexpression");
    firstGene.setTaxon(mouseTaxon);
    firstGene = geneS.create(firstGene);
    Gene secondGene = Gene.Factory.newInstance();
    secondGene.setName("test_gene2geneCoexpression2");
    secondGene.setTaxon(mouseTaxon);
    secondGene = geneS.create(secondGene);
    List<NonPersistentNonOrderedCoexpLink> links = new ArrayList<>();
    links.add(new NonPersistentNonOrderedCoexpLink(MouseGeneCoExpression.Factory.newInstance(0.9, secondGene.getId(), firstGene.getId())));
    ee = this.getTestPersistentBasicExpressionExperiment();
    Set<Gene> genesTested = new HashSet<>();
    genesTested.add(firstGene);
    genesTested.add(secondGene);
    g2gCoexpressionService.createOrUpdate(ee, links, new LinkCreator(mouseTaxon), genesTested);
}
Also used : LinkCreator(ubic.gemma.persistence.service.association.coexpression.LinkCreator) Gene(ubic.gemma.model.genome.Gene) Taxon(ubic.gemma.model.genome.Taxon) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) Before(org.junit.Before)

Example 2 with NonPersistentNonOrderedCoexpLink

use of ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink in project Gemma by PavlidisLab.

the class LinkAnalysisPersisterImpl method initializeLinksFromOldData.

@Override
public void initializeLinksFromOldData(Taxon t) {
    Collection<Gene> genes = geneService.loadAll(t);
    Map<Long, Gene> idMap = EntityUtils.getIdMap(genes);
    /*
         * First count the old links for every gene, and remove genes that have too few. That set of genes has to be
         * passed in to the service so they would be recognized in the second gene. We have to do that counting as a
         * separate step because we need to know ahead of time. This might be more trouble than it is worth...
         */
    LinkAnalysisPersisterImpl.log.info("Counting old links for " + genes.size() + " genes.");
    Map<Gene, Integer> counts = gene2GeneCoexpressionService.countOldLinks(genes);
    int LIMIT = 100;
    Set<Long> skipGenes = new HashSet<>();
    for (Gene g : counts.keySet()) {
        if (counts.get(g) < LIMIT) {
            skipGenes.add(g.getId());
        }
    }
    if (skipGenes.size() == genes.size()) {
        throw new IllegalStateException("There weren't enough links to bother making any stubs.");
    }
    Map<NonPersistentNonOrderedCoexpLink, SupportDetails> linksSoFar = new HashMap<>();
    LinkAnalysisPersisterImpl.log.info("Creating stub links for up to " + genes.size() + " genes; " + skipGenes.size() + " genes will be ignored because they have too few links.");
    int numGenes = 0;
    int count = 0;
    for (Gene gene : genes) {
        Map<SupportDetails, Gene2GeneCoexpression> links = gene2GeneCoexpressionService.initializeLinksFromOldData(gene, idMap, linksSoFar, skipGenes);
        if (links == null || links.isEmpty())
            continue;
        count += links.size();
        /*
             * Keep track of links created so far (ignoring "direction") so we can resuse the supportDetails.
             */
        for (SupportDetails sd : links.keySet()) {
            assert sd.getId() != null;
            Gene2GeneCoexpression g2g = links.get(sd);
            assert g2g.getId() != null;
            assert g2g.getSupportDetails() != null && g2g.getSupportDetails().getId() != null;
            assert sd.equals(g2g.getSupportDetails());
            NonPersistentNonOrderedCoexpLink linkVO = new NonPersistentNonOrderedCoexpLink(g2g.getFirstGene(), g2g.getSecondGene(), g2g.isPositiveCorrelation());
            if (linksSoFar.containsKey(linkVO)) {
                // directions. Removing it will help us free up memory.
                assert sd.equals(linksSoFar.get(linkVO));
                linksSoFar.remove(linkVO);
            } else {
                linksSoFar.put(linkVO, sd);
            }
        }
        LinkAnalysisPersisterImpl.log.info(links.size() + " links created for " + gene + ", " + count + " links created so far.");
        if (++numGenes % 500 == 0) {
            LinkAnalysisPersisterImpl.log.info("***** " + numGenes + " processed");
        }
    }
}
Also used : Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) SupportDetails(ubic.gemma.model.analysis.expression.coexpression.SupportDetails) Gene(ubic.gemma.model.genome.Gene) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink)

Example 3 with NonPersistentNonOrderedCoexpLink

use of ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink in project Gemma by PavlidisLab.

the class LinkAnalysisPersisterImpl method saveLinks.

/**
 * @return how many links were saved
 */
private int saveLinks(LinkAnalysis la, ObjectArrayList links) {
    LinkCreator c = this.getLinkCreator(la);
    int selfLinksSkipped = 0;
    int duplicateLinksSkipped = 0;
    Set<Gene> genesWithLinks = new HashSet<>();
    Set<NonPersistentNonOrderedCoexpLink> linksForDb = new HashSet<>();
    for (int i = 0, n = links.size(); i < n; i++) {
        Object val = links.getQuick(i);
        if (val == null)
            continue;
        Link m = (Link) val;
        Double w = m.getWeight();
        int x = m.getx();
        int y = m.gety();
        CompositeSequence p1 = la.getProbe(x);
        CompositeSequence p2 = la.getProbe(y);
        /*
             * we have to deal with all the possible genes pairs, if probes map to more than one pair. A single pair of
             * probes could result in more than one link. This assumes that preprocessing of the data allowed retention
             * of probes that map to more than one gene.
             */
        for (Gene g1 : la.getProbeToGeneMap().get(p1)) {
            boolean g1HasLinks = false;
            for (Gene g2 : la.getProbeToGeneMap().get(p2)) {
                if (g1.equals(g2)) {
                    selfLinksSkipped++;
                    continue;
                }
                NonPersistentNonOrderedCoexpLink link = new NonPersistentNonOrderedCoexpLink(this.initCoexp(w, c, g1, g2));
                if (linksForDb.contains(link)) {
                    /*
                         * This happens if there is more than one probe retained for a gene (or both genes) and the
                         * coexpression shows up more than once (different pair of probes, same genes).
                         */
                    if (LinkAnalysisPersisterImpl.log.isDebugEnabled())
                        LinkAnalysisPersisterImpl.log.debug("Skipping duplicate: " + link);
                    duplicateLinksSkipped++;
                    continue;
                /*
                         * FIXME what do we do when a pair of genes is both positively and negatively correlated in the
                         * same experiment? Currently they are both kept, but if we go to a completely gene-based
                         * analysis we wouldn't do that, so it's an inconsistency;
                         */
                }
                if (LinkAnalysisPersisterImpl.log.isDebugEnabled()) {
                    LinkAnalysisPersisterImpl.log.debug("Adding : " + link);
                }
                linksForDb.add(link);
                g1HasLinks = true;
                genesWithLinks.add(g2);
            }
            if (g1HasLinks)
                genesWithLinks.add(g1);
        }
        if (i > 0 && i % 200000 == 0) {
            LinkAnalysisPersisterImpl.log.info(i + " links checked");
        }
    }
    if (selfLinksSkipped > 0) {
        LinkAnalysisPersisterImpl.log.info(selfLinksSkipped + " self-links skipped");
    }
    if (duplicateLinksSkipped > 0) {
        LinkAnalysisPersisterImpl.log.info(duplicateLinksSkipped + " duplicate links skipped (likely cause: more than one probe supporting the same link)");
    }
    if (linksForDb.isEmpty()) {
        throw new RuntimeException("No links left!");
    }
    LinkAnalysisPersisterImpl.log.info(linksForDb.size() + " links ready for saving to db");
    if (!la.getGenesTested().containsAll(genesWithLinks))
        throw new AssertionError();
    /*
         * Do the actual database writing. It's a good idea to do this part in one (big) transaction. Note that even if
         * there are no links, we still update the "genes tested" information.
         */
    this.gene2GeneCoexpressionService.createOrUpdate(la.getExpressionExperiment(), new ArrayList<>(linksForDb), c, la.getGenesTested());
    /*
         * Update the meta-data about the analysis
         */
    CoexpressionAnalysis analysisObj = la.getAnalysisObj();
    assert analysisObj.getId() != null;
    analysisObj.setNumberOfElementsAnalyzed(la.getGenesTested().size());
    analysisObj.setNumberOfLinks(linksForDb.size());
    coexpressionAnalysisService.update(analysisObj);
    return linksForDb.size();
/*
         * Updating node degree cannot be done here, since we need to know the support. We have to do that
         * "periodically" if we want it available in summary form.
         */
}
Also used : LinkCreator(ubic.gemma.persistence.service.association.coexpression.LinkCreator) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) CoexpressionAnalysis(ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis) Gene(ubic.gemma.model.genome.Gene) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) Link(ubic.basecode.dataStructure.Link)

Aggregations

Gene (ubic.gemma.model.genome.Gene)3 NonPersistentNonOrderedCoexpLink (ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink)3 LinkCreator (ubic.gemma.persistence.service.association.coexpression.LinkCreator)2 Before (org.junit.Before)1 Link (ubic.basecode.dataStructure.Link)1 CoexpressionAnalysis (ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis)1 SupportDetails (ubic.gemma.model.analysis.expression.coexpression.SupportDetails)1 Gene2GeneCoexpression (ubic.gemma.model.association.coexpression.Gene2GeneCoexpression)1 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)1 Taxon (ubic.gemma.model.genome.Taxon)1