use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class GeneSetSearchImpl method goTermToGeneSets.
private Collection<GeneSet> goTermToGeneSets(OntologyTerm term, Integer maxGeneSetSize) {
if (term == null)
return null;
if (term.getUri() == null)
return null;
Collection<OntologyResource> allMatches = new HashSet<>();
allMatches.add(term);
allMatches.addAll(this.geneOntologyService.getAllChildren(term));
GeneSetSearchImpl.log.info(term);
/*
* Gather up uris
*/
Collection<String> termsToFetch = new HashSet<>();
for (OntologyResource t : allMatches) {
String goId = this.uri2goid(t);
termsToFetch.add(goId);
}
Map<Taxon, Collection<Gene>> genesByTaxon = this.gene2GoService.findByGOTermsPerTaxon(termsToFetch);
Collection<GeneSet> results = new HashSet<>();
for (Taxon t : genesByTaxon.keySet()) {
Collection<Gene> genes = genesByTaxon.get(t);
if (genes.isEmpty() || (maxGeneSetSize != null && genes.size() > maxGeneSetSize)) {
continue;
}
GeneSet transientGeneSet = GeneSet.Factory.newInstance();
transientGeneSet.setName(this.uri2goid(term));
transientGeneSet.setDescription(term.getLabel());
for (Gene gene : genes) {
GeneSetMember gmember = GeneSetMember.Factory.newInstance();
gmember.setGene(gene);
transientGeneSet.getMembers().add(gmember);
}
results.add(transientGeneSet);
}
return results;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class SearchServiceImpl method ontologyUriSearch.
/**
* @return results, if the settings.termUri is populated. This includes gene uris.
*/
private Map<Class<?>, List<SearchResult>> ontologyUriSearch(SearchSettings settings) {
Map<Class<?>, List<SearchResult>> results = new HashMap<>();
// 1st check to see if the query is a URI (from an ontology).
// Do this by seeing if we can find it in the loaded ontologies.
// Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one).
String termUri = settings.getTermUri();
if (StringUtils.isBlank(termUri)) {
termUri = settings.getQuery();
}
if (!termUri.startsWith("http://")) {
return results;
}
OntologyTerm matchingTerm;
String uriString;
uriString = StringEscapeUtils.escapeJava(StringUtils.strip(termUri));
if (StringUtils.containsIgnoreCase(uriString, SearchServiceImpl.NCBI_GENE)) {
// Perhaps is a valid gene URL. Want to search for the gene in gemma.
// 1st get objects tagged with the given gene identifier
Collection<Class<?>> classesToFilterOn = new HashSet<>();
classesToFilterOn.add(ExpressionExperiment.class);
Collection<Characteristic> foundCharacteristics = characteristicService.findByUri(classesToFilterOn, uriString);
Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToFilterOn, foundCharacteristics);
Collection<SearchResult> characteristicOwnerResults = this.filterCharacteristicOwnersByClass(classesToFilterOn, parentMap);
if (!characteristicOwnerResults.isEmpty()) {
results.put(ExpressionExperiment.class, new ArrayList<SearchResult>());
results.get(ExpressionExperiment.class).addAll(characteristicOwnerResults);
}
if (settings.getSearchGenes()) {
// Get the gene
String ncbiAccessionFromUri = StringUtils.substringAfterLast(uriString, "/");
Gene g = null;
try {
g = geneService.findByNCBIId(Integer.parseInt(ncbiAccessionFromUri));
} catch (NumberFormatException e) {
// ok
}
if (g != null) {
results.put(Gene.class, new ArrayList<SearchResult>());
results.get(Gene.class).add(new SearchResult(g));
}
}
return results;
}
/*
* Not searching for a gene.
*/
Collection<SearchResult> matchingResults;
Collection<Class<?>> classesToSearch = new HashSet<>();
if (settings.getSearchExperiments()) {
// not sure ...
classesToSearch.add(ExpressionExperiment.class);
classesToSearch.add(BioMaterial.class);
classesToSearch.add(FactorValue.class);
}
// this doesn't seem to be implemented yet, LiteratureEvidence and GenericEvidence aren't handled in the
// fillValueObjects method downstream
/*
* if ( settings.getSearchPhenotypes() ) { classesToSearch.add( PhenotypeAssociation.class ); }
*/
matchingTerm = this.ontologyService.getTerm(uriString);
if (matchingTerm == null || matchingTerm.getUri() == null) {
/*
* Maybe the ontology isn't loaded. Look anyway.
*/
Map<Characteristic, Object> parentMap = characteristicService.getParents(classesToSearch, characteristicService.findByUri(classesToSearch, uriString));
matchingResults = this.filterCharacteristicOwnersByClass(classesToSearch, parentMap);
} else {
SearchServiceImpl.log.info("Found ontology term: " + matchingTerm);
// Was a URI from a loaded ontology soo get the children.
Collection<OntologyTerm> terms2Search4 = matchingTerm.getChildren(true);
terms2Search4.add(matchingTerm);
matchingResults = this.databaseCharacteristicExactUriSearchForOwners(classesToSearch, terms2Search4);
}
for (SearchResult searchR : matchingResults) {
if (results.containsKey(searchR.getResultClass())) {
results.get(searchR.getResultClass()).add(searchR);
} else {
List<SearchResult> rs = new ArrayList<>();
rs.add(searchR);
results.put(searchR.getResultClass(), rs);
}
}
return results;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class SearchServiceImpl method compassBioSequenceSearch.
/**
* A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes
* for matches then converts those results to biosequences
*
* @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
* for the genes are added to the final results.
*/
private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) {
Collection<SearchResult> results = this.compassSearch(compassBiosequence, settings);
Collection<SearchResult> geneResults;
if (previousGeneSearchResults == null) {
SearchServiceImpl.log.info("Biosequence Search: running gene search with " + settings.getQuery());
geneResults = this.compassGeneSearch(settings);
} else {
SearchServiceImpl.log.info("Biosequence Search: using previous results");
geneResults = previousGeneSearchResults;
}
Map<Gene, SearchResult> genes = new HashMap<>();
for (SearchResult sr : geneResults) {
Object resultObject = sr.getResultObject();
if (Gene.class.isAssignableFrom(resultObject.getClass())) {
genes.put((Gene) resultObject, sr);
} else {
// see bug 1774 -- may not be happening any more.
SearchServiceImpl.log.warn("Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery());
}
}
Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet());
for (Gene gene : seqsFromDb.keySet()) {
List<BioSequence> bs = new ArrayList<>(seqsFromDb.get(gene));
// bioSequenceService.thawRawAndProcessed( bs );
results.addAll(this.dbHitsToSearchResult(bs, genes.get(gene), null));
}
return results;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class SearchServiceImpl method databaseGeneSearch.
/**
* Search the DB for genes that exactly match the given search string searches geneProducts, gene and bioSequence
* tables
*/
private Collection<SearchResult> databaseGeneSearch(SearchSettings settings) {
if (!settings.getUseDatabase())
return new HashSet<>();
StopWatch watch = this.startTiming();
String searchString = StringEscapeUtils.unescapeJava(settings.getQuery());
if (StringUtils.isBlank(searchString))
return new HashSet<>();
Collection<SearchResult> results = new HashSet<>();
/*
* First search by accession. If we find it, stop.
*/
Gene result = null;
try {
result = geneService.findByNCBIId(Integer.parseInt(searchString));
} catch (NumberFormatException e) {
//
}
if (result != null) {
results.add(this.dbHitToSearchResult(result));
} else {
result = geneService.findByAccession(searchString, null);
if (result != null) {
results.add(this.dbHitToSearchResult(result));
}
}
if (results.size() > 0) {
this.filterByTaxon(settings, results, true);
watch.stop();
if (watch.getTime() > 1000)
SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + results.size() + " genes");
return results;
}
// replace * at end with % for inexact symbol search
String inexactString = searchString;
Pattern pattern = Pattern.compile("\\*$");
Matcher match = pattern.matcher(inexactString);
inexactString = match.replaceAll("%");
// note that at this point, the inexactString might not have a wildcard - only if the user asked for it.
String exactString = inexactString.replaceAll("%", "");
// if the query is shortish, always do a wild card search. This gives better behavior in 'live
// search' situations. If we do wildcards on very short queries we get too many results.
Collection<Gene> geneSet = new HashSet<>();
if (searchString.length() <= 2) {
// case 0: we got no result syet, or user entered a very short string. We search only for exact matches.
geneSet.addAll(geneService.findByOfficialSymbolInexact(exactString));
} else if (inexactString.endsWith("%")) {
// case 1: user explicitly asked for wildcard. We allow this on strings of length 3 or more.
geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));
} else if (searchString.length() > 3) {
// case 2: user did not ask for a wildcard, but we add it anyway, if the string is 4 or 5 characters.
if (!inexactString.endsWith("%")) {
inexactString = inexactString + "%";
}
geneSet.addAll(geneService.findByOfficialSymbolInexact(inexactString));
} else {
// case 3: string is long enough, and user did not ask for wildcard.
geneSet.addAll(geneService.findByOfficialSymbol(exactString));
}
/*
* If we found a match using official symbol or name, don't bother with this
*/
if (geneSet.isEmpty()) {
geneSet.addAll(geneService.findByAlias(exactString));
geneSet.addAll(geneProductService.getGenesByName(exactString));
geneSet.addAll(geneProductService.getGenesByNcbiId(exactString));
geneSet.addAll(bioSequenceService.getGenesByAccession(exactString));
geneSet.addAll(bioSequenceService.getGenesByName(exactString));
geneSet.add(geneService.findByEnsemblId(exactString));
}
watch.stop();
if (watch.getTime() > 1000)
SearchServiceImpl.log.info("Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " + geneSet.size() + " genes");
results = this.dbHitsToSearchResult(geneSet, null);
this.filterByTaxon(settings, results, true);
return results;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class GeneOntologyServiceImpl method putOverlapGenes.
private void putOverlapGenes(Map<Long, Collection<OntologyTerm>> overlap, Collection<OntologyTerm> queryGeneTerms, Collection<Gene> genes) {
for (Object obj : genes) {
Gene gene = (Gene) obj;
if (queryGeneTerms.isEmpty()) {
overlap.put(gene.getId(), new HashSet<OntologyTerm>());
continue;
}
Collection<OntologyTerm> comparisonOntos = this.getGOTerms(gene);
if (comparisonOntos == null || comparisonOntos.isEmpty()) {
overlap.put(gene.getId(), new HashSet<OntologyTerm>());
continue;
}
overlap.put(gene.getId(), this.computeOverlap(queryGeneTerms, comparisonOntos));
}
}
Aggregations