Search in sources :

Example 11 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class StringProteinProteinInteractionObjectGenerator method generate.

/**
 * Main method to generate StringProteinProteinInteraction objects.
 *
 * @param validTaxa Taxon to generate StringProteinProteinInteraction from string (STRING has many taxon).
 * @return Collection of StringProteinProteinInteraction objects specific for the taxa that were provided, held in a
 * may keyed on taxon.
 */
public Map<Taxon, Collection<StringProteinProteinInteraction>> generate(Collection<Taxon> validTaxa) {
    log.debug("Starting to get StringProteinProteinInteraction data");
    Collection<StringProteinProteinInteraction> stringProteinProteinInteractions;
    if (stringProteinInteractionFileLocal == null) {
        log.info("stringProteinInteractionFile is remote file fetching remote site");
        fetchProteinStringFileFromRemoteSiteUnArchived();
    }
    Map<Taxon, Collection<StringProteinProteinInteraction>> map = new HashMap<>();
    // however when I did it in one big go I got java.lang.OutOfMemoryError: Java heap space
    for (Taxon taxon : validTaxa) {
        log.info("calling taxon " + taxon);
        Collection<Taxon> taxa = new ArrayList<>();
        taxa.add(taxon);
        stringProteinProteinInteractions = this.parseProteinStringFileInteraction(taxa);
        map.put(taxon, stringProteinProteinInteractions);
    }
    log.debug("Starting to get StringProteinProteinInteraction data");
    return map;
}
Also used : HashMap(java.util.HashMap) Taxon(ubic.gemma.model.genome.Taxon) ArrayList(java.util.ArrayList) Collection(java.util.Collection) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)

Example 12 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class LinkAnalysisServiceTest method checkResults.

private int checkResults(Collection<BioAssaySet> ees, int expectedMinimumMaxSupport) {
    boolean foundOne = false;
    int maxSupport = 0;
    Taxon mouse = taxonService.findByCommonName("mouse");
    Collection<Gene> genesWithLinks = new ArrayList<>();
    int totalLinks = 0;
    // numdatasetstesting will not be set so we won't bother checking.
    assertTrue(!geneCoexpressionService.getCoexpression(ee, true).isEmpty());
    Collection<CoexpressionValueObject> eeResults = geneCoexpressionService.getCoexpression(ee, false);
    assertTrue(!eeResults.isEmpty());
    for (CoexpressionValueObject coex : eeResults) {
        this.checkResult(coex);
    }
    Map<Long, GeneCoexpressionNodeDegreeValueObject> nodeDegrees = geneCoexpressionService.getNodeDegrees(EntityUtils.getIds(geneService.loadAll()));
    assertTrue(!nodeDegrees.isEmpty());
    // experiment-major query
    Map<Long, List<CoexpressionValueObject>> allLinks = geneCoexpressionService.findCoexpressionRelationships(mouse, new HashSet<Long>(), EntityUtils.getIds(ees), ees.size(), 10, false);
    assertTrue(!allLinks.isEmpty());
    for (Long g : allLinks.keySet()) {
        for (CoexpressionValueObject coex : allLinks.get(g)) {
            this.checkResult(coex);
        }
    }
    for (Gene gene : geneService.loadAll(mouse)) {
        Collection<CoexpressionValueObject> links = geneCoexpressionService.findCoexpressionRelationships(gene, EntityUtils.getIds(ees), 1, 0, false);
        if (links == null || links.isEmpty()) {
            continue;
        }
        assertEquals(geneCoexpressionService.findCoexpressionRelationships(gene, Collections.singleton(ee.getId()), 0, false).size(), geneCoexpressionService.countLinks(ee, gene).intValue());
        GeneCoexpressionNodeDegreeValueObject nodeDegree = geneCoexpressionService.getNodeDegree(gene);
        if (links.size() != nodeDegree.getLinksWithMinimumSupport(1)) {
            log.info(nodeDegree);
            assertEquals("Node degree check failed for gene " + gene, links.size(), nodeDegree.getLinksWithMinimumSupport(1).intValue());
        }
        assertTrue(nodeDegree.getLinksWithMinimumSupport(1) >= nodeDegree.getLinksWithMinimumSupport(2));
        totalLinks += links.size();
        log.debug(links.size() + " hits for " + gene);
        for (CoexpressionValueObject coex : links) {
            this.checkResult(coex);
            if (coex.getNumDatasetsSupporting() > maxSupport) {
                maxSupport = coex.getNumDatasetsSupporting();
            }
        }
        foundOne = true;
        if (genesWithLinks.size() == 5) {
            // without specifying stringency
            Map<Long, List<CoexpressionValueObject>> multiGeneResults = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 100, false);
            if (multiGeneResults.isEmpty()) {
                // noinspection ConstantConditions // these strange structures are to help with debugger.
                assertTrue(!multiGeneResults.isEmpty());
            }
            for (Long id : multiGeneResults.keySet()) {
                for (CoexpressionValueObject coex : multiGeneResults.get(id)) {
                    this.checkResult(coex);
                }
            }
            // with stringency specified, quick.
            Map<Long, List<CoexpressionValueObject>> multiGeneResults2 = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), ees.size(), 100, true);
            if (multiGeneResults.size() != multiGeneResults2.size()) {
                assertEquals(multiGeneResults.size(), multiGeneResults2.size());
            }
            for (Long id : multiGeneResults2.keySet()) {
                for (CoexpressionValueObject coex : multiGeneResults2.get(id)) {
                    this.checkResult(coex);
                }
            }
        }
        genesWithLinks.add(gene);
    }
    assertTrue(foundOne);
    Map<Long, List<CoexpressionValueObject>> mygeneresults = geneCoexpressionService.findInterCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 1, false);
    if (mygeneresults.isEmpty()) {
        // noinspection ConstantConditions // these strange structures are to help with debugger.
        assertTrue(!mygeneresults.isEmpty());
    }
    for (Long id : mygeneresults.keySet()) {
        for (CoexpressionValueObject coex : mygeneresults.get(id)) {
            this.checkResult(coex);
        }
    }
    assertTrue(maxSupport >= expectedMinimumMaxSupport);
    return totalLinks;
}
Also used : Taxon(ubic.gemma.model.genome.Taxon) Gene(ubic.gemma.model.genome.Gene) GeneCoexpressionNodeDegreeValueObject(ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject) CoexpressionValueObject(ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject)

Example 13 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class DiffExMetaAnalyzerServiceTest method addGenes.

/**
 * Add gene annotations. Requires removing old sequence associations.
 */
private void addGenes() throws Exception {
    // fill this in with whatever.
    ExternalDatabase genbank = edService.findByName("genbank");
    assert genbank != null;
    Taxon human = taxonService.findByCommonName("human");
    assert human != null;
    File annotationFile = new File(this.getClass().getResource("/data/loader/expression/geo/meta-analysis/human.probes.for.import.txt").toURI());
    ArrayDesign gpl96 = arrayDesignService.findByShortName("GPL96");
    assertNotNull(gpl96);
    ArrayDesign gpl97 = arrayDesignService.findByShortName("GPL97");
    assertNotNull(gpl97);
    arrayDesignService.removeBiologicalCharacteristics(gpl96);
    arrayDesignProbeMapperService.processArrayDesign(gpl96, human, annotationFile, genbank, false);
    arrayDesignService.removeBiologicalCharacteristics(gpl97);
    arrayDesignProbeMapperService.processArrayDesign(gpl97, human, annotationFile, genbank, false);
    tableMaintenanceUtil.updateGene2CsEntries();
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) File(java.io.File)

Example 14 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class Gene2GeneCoexpressionServiceTest method setup.

@Before
public void setup() {
    Taxon mouseTaxon = taxonS.findByCommonName("mouse");
    firstGene = Gene.Factory.newInstance();
    firstGene.setName("test_gene2geneCoexpression");
    firstGene.setTaxon(mouseTaxon);
    firstGene = geneS.create(firstGene);
    Gene secondGene = Gene.Factory.newInstance();
    secondGene.setName("test_gene2geneCoexpression2");
    secondGene.setTaxon(mouseTaxon);
    secondGene = geneS.create(secondGene);
    List<NonPersistentNonOrderedCoexpLink> links = new ArrayList<>();
    links.add(new NonPersistentNonOrderedCoexpLink(MouseGeneCoExpression.Factory.newInstance(0.9, secondGene.getId(), firstGene.getId())));
    ee = this.getTestPersistentBasicExpressionExperiment();
    Set<Gene> genesTested = new HashSet<>();
    genesTested.add(firstGene);
    genesTested.add(secondGene);
    g2gCoexpressionService.createOrUpdate(ee, links, new LinkCreator(mouseTaxon), genesTested);
}
Also used : LinkCreator(ubic.gemma.persistence.service.association.coexpression.LinkCreator) Gene(ubic.gemma.model.genome.Gene) Taxon(ubic.gemma.model.genome.Taxon) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) Before(org.junit.Before)

Example 15 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class ProbeMapperTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    tester = new ArrayList<>();
    tester.add(400d);
    tester.add(200d);
    tester.add(100d);
    tester.add(50d);
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/genome/col8a1.blatresults.txt")) {
        BlatResultParser brp = new BlatResultParser();
        Taxon m = Taxon.Factory.newInstance();
        m.setCommonName("mouse");
        brp.setTaxon(m);
        brp.parse(is);
        blatres = brp.getResults();
        assert blatres != null && blatres.size() > 0;
    }
    String databaseHost = Settings.getString("gemma.testdb.host");
    String databaseUser = Settings.getString("gemma.testdb.user");
    String databasePassword = Settings.getString("gemma.testdb.password");
    mousegp = new GoldenPathSequenceAnalysis(3306, Settings.getString("gemma.goldenpath.db.mouse"), databaseHost, databaseUser, databasePassword);
    humangp = new GoldenPathSequenceAnalysis(3306, Settings.getString("gemma.goldenpath.db.human"), databaseHost, databaseUser, databasePassword);
}
Also used : GoldenPathSequenceAnalysis(ubic.gemma.core.externalDb.GoldenPathSequenceAnalysis) BlatResultParser(ubic.gemma.core.loader.genome.BlatResultParser) InputStream(java.io.InputStream) Taxon(ubic.gemma.model.genome.Taxon)

Aggregations

Taxon (ubic.gemma.model.genome.Taxon)161 Gene (ubic.gemma.model.genome.Gene)34 Test (org.junit.Test)31 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)29 HashSet (java.util.HashSet)23 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)23 InputStream (java.io.InputStream)17 Before (org.junit.Before)16 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)15 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)14 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)12 StopWatch (org.apache.commons.lang3.time.StopWatch)11 Transactional (org.springframework.transaction.annotation.Transactional)11 ArrayList (java.util.ArrayList)10 File (java.io.File)9 SimpleExpressionExperimentMetaData (ubic.gemma.core.loader.expression.simple.model.SimpleExpressionExperimentMetaData)9 Chromosome (ubic.gemma.model.genome.Chromosome)8 Collection (java.util.Collection)7 Element (org.w3c.dom.Element)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7