Search in sources :

Example 1 with CharacteristicValueObject

use of ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject in project Gemma by PavlidisLab.

the class GeneSetSearchImpl method findByPhenotypeName.

@Override
public Collection<GeneSetValueObject> findByPhenotypeName(String phenotypeQuery, Taxon taxon) {
    StopWatch timer = new StopWatch();
    timer.start();
    Collection<CharacteristicValueObject> phenotypes = phenotypeAssociationManagerService.searchOntologyForPhenotypes(StringUtils.strip(phenotypeQuery), null);
    Collection<GeneSetValueObject> results = new HashSet<>();
    if (phenotypes.isEmpty()) {
        return results;
    }
    if (timer.getTime() > 200) {
        GeneSetSearchImpl.log.info("Find phenotypes: " + timer.getTime() + "ms");
    }
    GeneSetSearchImpl.log.debug(" Converting CharacteristicValueObjects collection(size:" + phenotypes.size() + ") into GeneSets for  phenotype query " + phenotypeQuery);
    Map<String, CharacteristicValueObject> uris = new HashMap<>();
    for (CharacteristicValueObject cvo : phenotypes) {
        uris.put(cvo.getValueUri(), cvo);
    }
    Map<String, Collection<? extends GeneValueObject>> genes = phenotypeAssociationManagerService.findCandidateGenesForEach(uris.keySet(), taxon);
    if (timer.getTime() > 500) {
        GeneSetSearchImpl.log.info("Find phenotype genes done at " + timer.getTime() + "ms");
    }
    for (String uri : genes.keySet()) {
        Collection<? extends GeneValueObject> gvos = genes.get(uri);
        if (gvos.isEmpty())
            continue;
        Collection<Long> geneIds = EntityUtils.getIds(gvos);
        GeneSetValueObject transientGeneSet = new GeneSetValueObject();
        transientGeneSet.setName(this.uri2phenoID(uris.get(uri)));
        transientGeneSet.setDescription(uris.get(uri).getValue());
        transientGeneSet.setGeneIds(geneIds);
        transientGeneSet.setTaxonId(gvos.iterator().next().getTaxonId());
        transientGeneSet.setTaxonName(gvos.iterator().next().getTaxonCommonName());
        results.add(transientGeneSet);
    }
    if (timer.getTime() > 1000) {
        GeneSetSearchImpl.log.info("Loaded " + phenotypes.size() + " phenotype gene sets for query " + phenotypeQuery + " in " + timer.getTime() + "ms");
    }
    return results;
}
Also used : CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject) HashMap(java.util.HashMap) StopWatch(org.apache.commons.lang3.time.StopWatch) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) Collection(java.util.Collection) GeneSetValueObject(ubic.gemma.model.genome.gene.GeneSetValueObject) HashSet(java.util.HashSet)

Example 2 with CharacteristicValueObject

use of ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject in project Gemma by PavlidisLab.

the class OntologyServiceImpl method countOccurrences.

// Possible external use
@SuppressWarnings({ "unused", "WeakerAccess" })
public void countOccurrences(Collection<CharacteristicValueObject> searchResults, Map<String, CharacteristicValueObject> previouslyUsedInSystem) {
    StopWatch watch = new StopWatch();
    watch.start();
    Set<String> uris = new HashSet<>();
    for (CharacteristicValueObject cvo : searchResults) {
        uris.add(cvo.getValueUri());
    }
    Collection<Characteristic> existingCharacteristicsUsingTheseTerms = characteristicService.findByUri(uris);
    for (Characteristic c : existingCharacteristicsUsingTheseTerms) {
        // count up number of usages; see bug 3897
        String key = this.foundValueKey(c);
        if (previouslyUsedInSystem.containsKey(key)) {
            previouslyUsedInSystem.get(key).incrementOccurrenceCount();
            continue;
        }
        if (OntologyServiceImpl.log.isDebugEnabled())
            OntologyServiceImpl.log.debug("saw " + key + " (" + key + ")");
        CharacteristicValueObject vo = new CharacteristicValueObject(c);
        vo.setCategory(null);
        // to avoid us counting separately by category.
        vo.setCategoryUri(null);
        vo.setAlreadyPresentInDatabase(true);
        vo.incrementOccurrenceCount();
        previouslyUsedInSystem.put(key, vo);
    }
    if (OntologyServiceImpl.log.isDebugEnabled() || (watch.getTime() > 100 && previouslyUsedInSystem.size() > 0))
        OntologyServiceImpl.log.info("found " + previouslyUsedInSystem.size() + " matching characteristics used in the database" + " in " + watch.getTime() + " ms " + " Filtered from initial set of " + existingCharacteristicsUsingTheseTerms.size());
}
Also used : CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject) Characteristic(ubic.gemma.model.common.description.Characteristic) VocabCharacteristic(ubic.gemma.model.common.description.VocabCharacteristic) StopWatch(org.apache.commons.lang3.time.StopWatch) ConcurrentHashSet(org.compass.core.util.concurrent.ConcurrentHashSet)

Example 3 with CharacteristicValueObject

use of ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject in project Gemma by PavlidisLab.

the class OntologyServiceImpl method countOccurrences.

private void countOccurrences(String queryString, Map<String, CharacteristicValueObject> previouslyUsedInSystem) {
    StopWatch watch = new StopWatch();
    watch.start();
    Collection<Characteristic> foundChars = characteristicService.findByValue(queryString);
    /*
         * Want to flag in the web interface that these are already used by Gemma (also ignore capitalization; category
         * is always ignored; remove duplicates.)
         */
    for (Characteristic characteristic : foundChars) {
        // count up number of usages; see bug 3897
        String key = this.foundValueKey(characteristic);
        if (previouslyUsedInSystem.containsKey(key)) {
            previouslyUsedInSystem.get(key).incrementOccurrenceCount();
            continue;
        }
        if (OntologyServiceImpl.log.isDebugEnabled())
            OntologyServiceImpl.log.debug("saw " + key + " (" + key + ") for " + characteristic);
        CharacteristicValueObject vo = new CharacteristicValueObject(characteristic);
        vo.setCategory(null);
        // to avoid us counting separately by category.
        vo.setCategoryUri(null);
        vo.setAlreadyPresentInDatabase(true);
        vo.incrementOccurrenceCount();
        previouslyUsedInSystem.put(key, vo);
    }
    if (OntologyServiceImpl.log.isDebugEnabled() || (watch.getTime() > 100 && previouslyUsedInSystem.size() > 0))
        OntologyServiceImpl.log.info("found " + previouslyUsedInSystem.size() + " matching characteristics used in the database" + " in " + watch.getTime() + " ms " + " Filtered from initial set of " + foundChars.size());
}
Also used : CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject) Characteristic(ubic.gemma.model.common.description.Characteristic) VocabCharacteristic(ubic.gemma.model.common.description.VocabCharacteristic) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 4 with CharacteristicValueObject

use of ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject in project Gemma by PavlidisLab.

the class OntologyServiceImpl method sort.

/**
 * @param alreadyUsedResults items already in the system; remove singleton free-text terms.
 * @param otherResults       other results
 * @param searchTerm         the query
 */
private Collection<CharacteristicValueObject> sort(Map<String, CharacteristicValueObject> alreadyUsedResults, Collection<CharacteristicValueObject> otherResults, String searchTerm) {
    /*
         * Organize the list into 3 parts. Want to get the exact match showing up on top
         */
    List<CharacteristicValueObject> sortedResultsExact = new ArrayList<>();
    List<CharacteristicValueObject> sortedResultsStartsWith = new ArrayList<>();
    List<CharacteristicValueObject> sortedResultsBottom = new ArrayList<>();
    Set<String> foundValues = new HashSet<>();
    for (String key : alreadyUsedResults.keySet()) {
        CharacteristicValueObject c = alreadyUsedResults.get(key);
        if (foundValues.contains(key))
            continue;
        foundValues.add(key);
        // don't show singletons of free-text terms.
        if (c.getValueUri() == null && c.getNumTimesUsed() < 2) {
            continue;
        }
        this.addToAppropriateList(searchTerm, sortedResultsExact, sortedResultsStartsWith, sortedResultsBottom, c);
    }
    for (CharacteristicValueObject c : otherResults) {
        assert c.getValueUri() != null;
        String key = this.foundValueKey(c);
        if (foundValues.contains(key))
            continue;
        foundValues.add(key);
        this.addToAppropriateList(searchTerm, sortedResultsExact, sortedResultsStartsWith, sortedResultsBottom, c);
    }
    this.sort(sortedResultsExact);
    this.sort(sortedResultsStartsWith);
    this.sort(sortedResultsBottom);
    List<CharacteristicValueObject> sortedTerms = new ArrayList<>(foundValues.size());
    sortedTerms.addAll(sortedResultsExact);
    sortedTerms.addAll(sortedResultsStartsWith);
    sortedTerms.addAll(sortedResultsBottom);
    return sortedTerms;
}
Also used : CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject) ConcurrentHashSet(org.compass.core.util.concurrent.ConcurrentHashSet)

Example 5 with CharacteristicValueObject

use of ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject in project Gemma by PavlidisLab.

the class OntologyServiceImpl method findCharacteristicsFromOntology.

/**
 * given a collection of characteristics add them to the correct List
 */
private Collection<CharacteristicValueObject> findCharacteristicsFromOntology(String searchQuery, boolean useNeuroCartaOntology, Map<String, CharacteristicValueObject> characteristicFromDatabaseWithValueUri) {
    Collection<CharacteristicValueObject> characteristicsFromOntology = new HashSet<>();
    // in neurocarta we don't need to search all Ontologies
    Collection<AbstractOntologyService> ontologyServicesToUse = new HashSet<>();
    if (useNeuroCartaOntology) {
        ontologyServicesToUse.add(this.nifstdOntologyService);
        ontologyServicesToUse.add(this.fmaOntologyService);
        ontologyServicesToUse.add(this.obiService);
    } else {
        ontologyServicesToUse = this.ontologyServices;
    }
    // search all Ontology
    for (AbstractOntologyService ontologyService : ontologyServicesToUse) {
        Collection<OntologyTerm> ontologyTerms = ontologyService.findTerm(searchQuery);
        for (OntologyTerm ontologyTerm : ontologyTerms) {
            // if the ontology term wasnt already found in the database
            if (characteristicFromDatabaseWithValueUri.get(ontologyTerm.getUri()) == null) {
                CharacteristicValueObject phenotype = new CharacteristicValueObject(-1L, ontologyTerm.getLabel().toLowerCase(), ontologyTerm.getUri());
                characteristicsFromOntology.add(phenotype);
            }
        }
    }
    return characteristicsFromOntology;
}
Also used : CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject) OntologyTerm(ubic.basecode.ontology.model.OntologyTerm) ConcurrentHashSet(org.compass.core.util.concurrent.ConcurrentHashSet)

Aggregations

CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)17 StopWatch (org.apache.commons.lang3.time.StopWatch)6 ConcurrentHashSet (org.compass.core.util.concurrent.ConcurrentHashSet)5 Gene (ubic.gemma.model.genome.Gene)5 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)3 SearchResult (ubic.gemma.core.search.SearchResult)3 Characteristic (ubic.gemma.model.common.description.Characteristic)3 Test (org.junit.Test)2 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)2 ExpressionExperimentSet (ubic.gemma.model.analysis.expression.ExpressionExperimentSet)2 PhenotypeAssociation (ubic.gemma.model.association.phenotype.PhenotypeAssociation)2 VocabCharacteristic (ubic.gemma.model.common.description.VocabCharacteristic)2 SearchSettingsValueObject (ubic.gemma.model.common.search.SearchSettingsValueObject)2 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)2 GeneEvidenceValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject)2 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1