Search in sources :

Example 11 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class GeneSetSearchImpl method goTermToGeneSets.

private Collection<GeneSet> goTermToGeneSets(OntologyTerm term, Integer maxGeneSetSize) {
    if (term == null)
        return null;
    if (term.getUri() == null)
        return null;
    Collection<OntologyResource> allMatches = new HashSet<>();
    allMatches.add(term);
    allMatches.addAll(this.geneOntologyService.getAllChildren(term));
    GeneSetSearchImpl.log.info(term);
    /*
         * Gather up uris
         */
    Collection<String> termsToFetch = new HashSet<>();
    for (OntologyResource t : allMatches) {
        String goId = this.uri2goid(t);
        termsToFetch.add(goId);
    }
    Map<Taxon, Collection<Gene>> genesByTaxon = this.gene2GoService.findByGOTermsPerTaxon(termsToFetch);
    Collection<GeneSet> results = new HashSet<>();
    for (Taxon t : genesByTaxon.keySet()) {
        Collection<Gene> genes = genesByTaxon.get(t);
        if (genes.isEmpty() || (maxGeneSetSize != null && genes.size() > maxGeneSetSize)) {
            continue;
        }
        GeneSet transientGeneSet = GeneSet.Factory.newInstance();
        transientGeneSet.setName(this.uri2goid(term));
        transientGeneSet.setDescription(term.getLabel());
        for (Gene gene : genes) {
            GeneSetMember gmember = GeneSetMember.Factory.newInstance();
            gmember.setGene(gene);
            transientGeneSet.getMembers().add(gmember);
        }
        results.add(transientGeneSet);
    }
    return results;
}
Also used : Taxon(ubic.gemma.model.genome.Taxon) GeneSetMember(ubic.gemma.model.genome.gene.GeneSetMember) Gene(ubic.gemma.model.genome.Gene) Collection(java.util.Collection) GeneSet(ubic.gemma.model.genome.gene.GeneSet) OntologyResource(ubic.basecode.ontology.model.OntologyResource) HashSet(java.util.HashSet)

Example 12 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class SearchServiceImpl method ontologyUriSearch.

/**
 * @return results, if the settings.termUri is populated. This includes gene uris.
 */
private Map<Class<?>, List<SearchResult>> ontologyUriSearch(SearchSettings settings) {
    Map<Class<?>, List<SearchResult>> results = new HashMap<>();
    // 1st check to see if the query is a URI (from an ontology).
    // Do this by seeing if we can find it in the loaded ontologies.
    // Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one).
    String termUri = settings.getTermUri();
    if (StringUtils.isBlank(termUri)) {
        termUri = settings.getQuery();
    }
    if (!termUri.startsWith("http://")) {
        return results;
    }
    OntologyTerm matchingTerm;
    String uriString;
    uriString = StringEscapeUtils.escapeJava(StringUtils.strip(termUri));
    if (StringUtils.containsIgnoreCase(uriString, SearchServiceImpl.NCBI_GENE)) {
        // Perhaps is a valid gene URL. Want to search for the gene in gemma.
        // 1st get objects tagged with the given gene identifier
        Collection<Class<?>> classesToFilterOn = new HashSet<>();
        classesToFilterOn.add(ExpressionExperiment.class);
        Collection<Characteristic> foundCharacteristics = characteristicService.findByUri(classesToFilterOn, uriString);
        Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToFilterOn, foundCharacteristics);
        Collection<SearchResult> characteristicOwnerResults = this.filterCharacteristicOwnersByClass(classesToFilterOn, parentMap);
        if (!characteristicOwnerResults.isEmpty()) {
            results.put(ExpressionExperiment.class, new ArrayList<SearchResult>());
            results.get(ExpressionExperiment.class).addAll(characteristicOwnerResults);
        }
        if (settings.getSearchGenes()) {
            // Get the gene
            String ncbiAccessionFromUri = StringUtils.substringAfterLast(uriString, "/");
            Gene g = null;
            try {
                g = geneService.findByNCBIId(Integer.parseInt(ncbiAccessionFromUri));
            } catch (NumberFormatException e) {
            // ok
            }
            if (g != null) {
                results.put(Gene.class, new ArrayList<SearchResult>());
                results.get(Gene.class).add(new SearchResult(g));
            }
        }
        return results;
    }
    /*
         * Not searching for a gene.
         */
    Collection<SearchResult> matchingResults;
    Collection<Class<?>> classesToSearch = new HashSet<>();
    if (settings.getSearchExperiments()) {
        // not sure ...
        classesToSearch.add(ExpressionExperiment.class);
        classesToSearch.add(BioMaterial.class);
        classesToSearch.add(FactorValue.class);
    }
    // this doesn't seem to be implemented yet, LiteratureEvidence and GenericEvidence aren't handled in the
    // fillValueObjects method downstream
    /*
         * if ( settings.getSearchPhenotypes() ) { classesToSearch.add( PhenotypeAssociation.class ); }
         */
    matchingTerm = this.ontologyService.getTerm(uriString);
    if (matchingTerm == null || matchingTerm.getUri() == null) {
        /*
             * Maybe the ontology isn't loaded. Look anyway.
             */
        Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToSearch, characteristicService.findByUri(classesToSearch, uriString));
        matchingResults = this.filterCharacteristicOwnersByClass(classesToSearch, parentMap);
    } else {
        SearchServiceImpl.log.info("Found ontology term: " + matchingTerm);
        // Was a URI from a loaded ontology soo get the children.
        Collection<OntologyTerm> terms2Search4 = matchingTerm.getChildren(true);
        terms2Search4.add(matchingTerm);
        matchingResults = this.databaseCharacteristicExactUriSearchForOwners(classesToSearch, terms2Search4);
    }
    for (SearchResult searchR : matchingResults) {
        if (results.containsKey(searchR.getResultClass())) {
            results.get(searchR.getResultClass()).add(searchR);
        } else {
            List<SearchResult> rs = new ArrayList<>();
            rs.add(searchR);
            results.put(searchR.getResultClass(), rs);
        }
    }
    return results;
}
Also used : Characteristic(ubic.gemma.model.common.description.Characteristic) VocabCharacteristic(ubic.gemma.model.common.description.VocabCharacteristic) OntologyTerm(ubic.basecode.ontology.model.OntologyTerm) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) Gene(ubic.gemma.model.genome.Gene) BibliographicReferenceValueObject(ubic.gemma.model.common.description.BibliographicReferenceValueObject) SearchSettingsValueObject(ubic.gemma.model.common.search.SearchSettingsValueObject) BioSequenceValueObject(ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject) GeneEvidenceValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject) CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)

Example 13 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class SearchServiceImpl method compassBioSequenceSearch.

/**
 * A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes
 * for matches then converts those results to biosequences
 *
 * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
 *                                  for the genes are added to the final results.
 */
private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) {
    Collection<SearchResult> results = this.compassSearch(compassBiosequence, settings);
    Collection<SearchResult> geneResults;
    if (previousGeneSearchResults == null) {
        SearchServiceImpl.log.info("Biosequence Search:  running gene search with " + settings.getQuery());
        geneResults = this.compassGeneSearch(settings);
    } else {
        SearchServiceImpl.log.info("Biosequence Search:  using previous results");
        geneResults = previousGeneSearchResults;
    }
    Map<Gene, SearchResult> genes = new HashMap<>();
    for (SearchResult sr : geneResults) {
        Object resultObject = sr.getResultObject();
        if (Gene.class.isAssignableFrom(resultObject.getClass())) {
            genes.put((Gene) resultObject, sr);
        } else {
            // see bug 1774 -- may not be happening any more.
            SearchServiceImpl.log.warn("Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery());
        }
    }
    Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet());
    for (Gene gene : seqsFromDb.keySet()) {
        List<BioSequence> bs = new ArrayList<>(seqsFromDb.get(gene));
        // bioSequenceService.thawRawAndProcessed( bs );
        results.addAll(this.dbHitsToSearchResult(bs, genes.get(gene), null));
    }
    return results;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BibliographicReferenceValueObject(ubic.gemma.model.common.description.BibliographicReferenceValueObject) SearchSettingsValueObject(ubic.gemma.model.common.search.SearchSettingsValueObject) BioSequenceValueObject(ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject) GeneEvidenceValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject) CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)

Example 14 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class SearchServiceImpl method databaseGeneSearch.

/**
 * Search the DB for genes that exactly match the given search string searches geneProducts, gene and bioSequence
 * tables
 */
private Collection<SearchResult> databaseGeneSearch(SearchSettings settings) {
    if (!settings.getUseDatabase())
        return new HashSet<>();
    StopWatch watch = this.startTiming();
    String searchString = StringEscapeUtils.unescapeJava(settings.getQuery());
    if (StringUtils.isBlank(searchString))
        return new HashSet<>();
    Collection<SearchResult> results = new HashSet<>();
    /*
         * First search by accession. If we find it, stop.
         */
    Gene result = null;
    try {
        result = geneService.findByNCBIId(Integer.parseInt(searchString));
    } catch (NumberFormatException e) {
    // 
    }
    if (result != null) {
        results.add(this.dbHitToSearchResult(result));
    } else {
        result = geneService.findByAccession(searchString, null);
        if (result != null) {
            results.add(this.dbHitToSearchResult(result));
        }
    }
    if (results.size() > 0) {
        this.filterByTaxon(settings, results, true);
        watch.stop();
        if (watch.getTime() > 1000)
            SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + results.size() + " genes");
        return results;
    }
    // replace * at end with % for inexact symbol search
    String inexactString = searchString;
    Pattern pattern = Pattern.compile("\\*$");
    Matcher match = pattern.matcher(inexactString);
    inexactString = match.replaceAll("%");
    // note that at this point, the inexactString might not have a wildcard - only if the user asked for it.
    String exactString = inexactString.replaceAll("%", "");
    // if the query is shortish, always do a wild card search. This gives better behavior in 'live
    // search' situations. If we do wildcards on very short queries we get too many results.
    Collection<Gene> geneSet = new HashSet<>();
    if (searchString.length() <= 2) {
        // case 0: we got no result syet, or user entered a very short string. We search only for exact matches.
        geneSet.addAll(geneService.findByOfficialSymbolInexact(exactString));
    } else if (inexactString.endsWith("%")) {
        // case 1: user explicitly asked for wildcard. We allow this on strings of length 3 or more.
        geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));
    } else if (searchString.length() > 3) {
        // case 2: user did not ask for a wildcard, but we add it anyway, if the string is 4 or 5 characters.
        if (!inexactString.endsWith("%")) {
            inexactString = inexactString + "%";
        }
        geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));
    } else {
        // case 3: string is long enough, and user did not ask for wildcard.
        geneSet.addAll(geneService.findByOfficialSymbol(exactString));
    }
    /*
         * If we found a match using official symbol or name, don't bother with this
         */
    if (geneSet.isEmpty()) {
        geneSet.addAll(geneService.findByAlias(exactString));
        geneSet.addAll(geneProductService.getGenesByName(exactString));
        geneSet.addAll(geneProductService.getGenesByNcbiId(exactString));
        geneSet.addAll(bioSequenceService.getGenesByAccession(exactString));
        geneSet.addAll(bioSequenceService.getGenesByName(exactString));
        geneSet.add(geneService.findByEnsemblId(exactString));
    }
    watch.stop();
    if (watch.getTime() > 1000)
        SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + geneSet.size() + " genes");
    results = this.dbHitsToSearchResult(geneSet, null);
    this.filterByTaxon(settings, results, true);
    return results;
}
Also used : Pattern(java.util.regex.Pattern) Gene(ubic.gemma.model.genome.Gene) Matcher(java.util.regex.Matcher) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 15 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class GeneOntologyServiceImpl method putOverlapGenes.

private void putOverlapGenes(Map<Long, Collection<OntologyTerm>> overlap, Collection<OntologyTerm> queryGeneTerms, Collection<Gene> genes) {
    for (Object obj : genes) {
        Gene gene = (Gene) obj;
        if (queryGeneTerms.isEmpty()) {
            overlap.put(gene.getId(), new HashSet<OntologyTerm>());
            continue;
        }
        Collection<OntologyTerm> comparisonOntos = this.getGOTerms(gene);
        if (comparisonOntos == null || comparisonOntos.isEmpty()) {
            overlap.put(gene.getId(), new HashSet<OntologyTerm>());
            continue;
        }
        overlap.put(gene.getId(), this.computeOverlap(queryGeneTerms, comparisonOntos));
    }
}
Also used : Gene(ubic.gemma.model.genome.Gene) GeneOntologyTermValueObject(ubic.gemma.model.genome.GeneOntologyTermValueObject) OntologyTerm(ubic.basecode.ontology.model.OntologyTerm)

Aggregations

Gene (ubic.gemma.model.genome.Gene)186 Taxon (ubic.gemma.model.genome.Taxon)34 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)32 StopWatch (org.apache.commons.lang3.time.StopWatch)31 Test (org.junit.Test)24 HashSet (java.util.HashSet)23 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)20 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)18 Element (org.w3c.dom.Element)16 ArrayList (java.util.ArrayList)13 Transactional (org.springframework.transaction.annotation.Transactional)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 Collection (java.util.Collection)11 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)11 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)10 HashMap (java.util.HashMap)8 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)7