Search in sources :

Example 1 with TissueSample

use of gov.nih.nci.ctd2.dashboard.model.TissueSample in project nci-ctd2-dashboard by CBIIT.

the class DashboardDaoImpl method ontologySearch.

/*
     * To get observation 'search' results, i.e. the intersection concept, the
     * implmentation of ontology search will be much more complex. Ideally it would
     * be better to compeltely separate the observation part, but to avoid repeating
     * the actual hierarchical searching, embedding observations here is the best
     * choice. Although the previous implementation is better for searching
     * subjects, but to cover the observation parts, we have no choice but to
     * compromise the clarity here.
     * 
     * Other points of consideration for the purpose of observation 'search': (1)
     * the ontologySearch cover the original non-ontology subject results (in
     * principle) (2) the subjects other than TissueSample and ECO term are neither
     * affected or covered by ontology search so it is not consistent.
     */
@Override
public SearchResults ontologySearch(String queryString) {
    final String[] searchTerms = parseWords(queryString);
    Set<Integer> observationsIntersection = null;
    Set<SubjectResult> subject_result = null;
    final int termCount = searchTerms.length;
    if (termCount <= 1) {
        // prevent wasting time finding observations
        subject_result = new HashSet<SubjectResult>(ontologySearchOneTerm(searchTerms[0].replace("\"", ""), null));
    } else {
        boolean first = true;
        Map<SubjectResult, Integer> subjectResultMap = new HashMap<SubjectResult, Integer>();
        for (String oneTerm : searchTerms) {
            oneTerm = oneTerm.replace("\"", "");
            log.debug("ontology search term:" + oneTerm);
            Set<Integer> observations = new HashSet<Integer>();
            List<SubjectResult> oneTermList = ontologySearchOneTerm(oneTerm, observations);
            for (SubjectResult s : oneTermList) {
                Integer matchNumber = subjectResultMap.get(s);
                if (matchNumber != null) {
                    s.matchNumber = matchNumber + 1;
                }
                subjectResultMap.put(s, s.getMatchNumber());
            }
            if (first) {
                observationsIntersection = observations;
                first = false;
            } else {
                observationsIntersection.retainAll(observations);
            }
        }
        subject_result = subjectResultMap.keySet();
    }
    SearchResults searchResults = new SearchResults();
    if (subject_result.size() > maxNumberOfSearchResults) {
        searchResults.oversized = subject_result.size();
        searchResults.subject_result = subject_result.stream().sorted(new SearchResultComparator()).limit(maxNumberOfSearchResults).collect(Collectors.toList());
        log.debug("size after limiting: " + subject_result.size());
    } else {
        searchResults.subject_result = new ArrayList<SubjectResult>(subject_result);
    }
    if (observationsIntersection != null) {
        searchResults.observation_result = observationsIntersection.stream().map(id -> this.getEntityById(Observation.class, id)).collect(Collectors.toList());
        log.debug("size of observation intersection: " + observationsIntersection.size());
    }
    return searchResults;
}
Also used : HashMap(java.util.HashMap) SearchResults(gov.nih.nci.ctd2.dashboard.util.SearchResults) BigInteger(java.math.BigInteger) SubjectResult(gov.nih.nci.ctd2.dashboard.util.SubjectResult) Observation(gov.nih.nci.ctd2.dashboard.model.Observation) HashSet(java.util.HashSet)

Example 2 with TissueSample

use of gov.nih.nci.ctd2.dashboard.model.TissueSample in project nci-ctd2-dashboard by CBIIT.

the class DashboardDaoImpl method ontologySearchOneTerm.

private List<SubjectResult> ontologySearchOneTerm(String oneTerm, final Set<Integer> observations) {
    long t1 = System.currentTimeMillis();
    List<Integer> list = ontologySearchDiseaseContext(oneTerm);
    List<SubjectResult> entities = new ArrayList<SubjectResult>();
    Session session = getSession();
    @SuppressWarnings("unchecked") org.hibernate.query.Query<TissueSample> query = session.createQuery("from TissueSampleImpl where code = :code");
    List<Integer> subjectIds = new ArrayList<Integer>();
    for (Integer i : list) {
        query.setParameter("code", i);
        TissueSample result = null;
        try {
            result = query.getSingleResult();
        } catch (NoResultException e) {
            log.info("Tissue sample not available for code " + i);
            continue;
        }
        int observationNumber = observationCountForTissueSample(i);
        int centerCount = centerCountForTissueSample(i);
        Set<String> roles = getRolesForSubjectId(result.getId());
        SubjectResult x = new SubjectResult(result, observationNumber, centerCount, 1, roles);
        entities.add(x);
        subjectIds.add(result.getId());
    }
    long t2 = System.currentTimeMillis();
    log.debug("disease context ontology search for '" + oneTerm + "' took " + (t2 - t1) + " milliseconds");
    log.debug("tissue sample results count: " + entities.size());
    if (observations != null) {
        List<Integer> observationIds = observationIdsForSubjectIds(subjectIds);
        observations.addAll(observationIds);
        log.debug("observations count: " + observations.size());
    }
    List<ECOTerm> ecoterms = findECOTerms(oneTerm);
    List<Integer> eco_list = ontologySearchExperimentalEvidence(ecoterms);
    int eco_list_size = eco_list.size();
    log.debug("eco list size:" + eco_list_size);
    if (eco_list_size > 0) {
        @SuppressWarnings("unchecked") org.hibernate.query.Query<ECOTerm> query2 = session.createQuery("FROM ECOTermImpl WHERE code in (:codes)");
        List<String> codes = eco_list.stream().map(x -> String.format("ECO:%07d", x)).collect(Collectors.toList());
        query2.setParameterList("codes", codes);
        List<ECOTerm> list2 = query2.list();
        for (ECOTerm x : list2) {
            int observationNumber = observationCountForEcoCode(x.getCode());
            int centerCount = centerCountForEcoCode(x.getCode());
            // no matchNumber, no roles
            SubjectResult subjectResult = new SubjectResult(x, observationNumber, centerCount, null, null);
            entities.add(subjectResult);
            if (observations != null) {
                List<Integer> observationIdsForOneECOTerm = observationIdsForEcoCode(x.getCode());
                observations.addAll(observationIdsForOneECOTerm);
            }
        }
    }
    log.debug("tissue sample results count after getting ECO term: " + entities.size());
    if (observations != null) {
        log.debug("observations count after getting ECO term: " + observations.size());
    }
    if (observations != null) {
        searchSubjectsToUpdateObservations(oneTerm, observations);
        log.debug("observationIds count after getting other subjects: " + observations.size());
    }
    session.close();
    return entities;
}
Also used : Query(org.apache.lucene.search.Query) ObservedEvidenceRole(gov.nih.nci.ctd2.dashboard.model.ObservedEvidenceRole) Transcript(gov.nih.nci.ctd2.dashboard.model.Transcript) Arrays(java.util.Arrays) DashboardDao(gov.nih.nci.ctd2.dashboard.dao.DashboardDao) ObservedSubjectRole(gov.nih.nci.ctd2.dashboard.model.ObservedSubjectRole) Cacheable(org.springframework.cache.annotation.Cacheable) NoResultException(javax.persistence.NoResultException) XRefItem(gov.nih.nci.ctd2.dashboard.api.XRefItem) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) Matcher(java.util.regex.Matcher) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) Map(java.util.Map) DashboardEntity(gov.nih.nci.ctd2.dashboard.model.DashboardEntity) CriteriaBuilder(javax.persistence.criteria.CriteriaBuilder) BigInteger(java.math.BigInteger) TissueSample(gov.nih.nci.ctd2.dashboard.model.TissueSample) Organism(gov.nih.nci.ctd2.dashboard.model.Organism) CriteriaQuery(javax.persistence.criteria.CriteriaQuery) ScrollableResults(org.hibernate.ScrollableResults) SearchResults(gov.nih.nci.ctd2.dashboard.util.SearchResults) SubjectResult(gov.nih.nci.ctd2.dashboard.util.SubjectResult) Collection(java.util.Collection) SessionFactory(org.hibernate.SessionFactory) Set(java.util.Set) FullTextQuery(org.hibernate.search.FullTextQuery) CellSample(gov.nih.nci.ctd2.dashboard.model.CellSample) Compound(gov.nih.nci.ctd2.dashboard.model.Compound) ECOTerm(gov.nih.nci.ctd2.dashboard.model.ECOTerm) SubjectWithOrganism(gov.nih.nci.ctd2.dashboard.model.SubjectWithOrganism) Collectors(java.util.stream.Collectors) TissueSampleImpl(gov.nih.nci.ctd2.dashboard.impl.TissueSampleImpl) ObservationItem(gov.nih.nci.ctd2.dashboard.api.ObservationItem) List(java.util.List) Xref(gov.nih.nci.ctd2.dashboard.model.Xref) EcoBrowse(gov.nih.nci.ctd2.dashboard.util.EcoBrowse) CompoundImpl(gov.nih.nci.ctd2.dashboard.impl.CompoundImpl) ScrollMode(org.hibernate.ScrollMode) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) DashboardEntityImpl(gov.nih.nci.ctd2.dashboard.impl.DashboardEntityImpl) Gene(gov.nih.nci.ctd2.dashboard.model.Gene) Pattern(java.util.regex.Pattern) LogFactory(org.apache.commons.logging.LogFactory) ShRna(gov.nih.nci.ctd2.dashboard.model.ShRna) Observation(gov.nih.nci.ctd2.dashboard.model.Observation) ParseException(org.apache.lucene.queryparser.classic.ParseException) FullTextSession(org.hibernate.search.FullTextSession) SubmissionImpl(gov.nih.nci.ctd2.dashboard.impl.SubmissionImpl) Subject(gov.nih.nci.ctd2.dashboard.model.Subject) Submission(gov.nih.nci.ctd2.dashboard.model.Submission) Session(org.hibernate.Session) HashMap(java.util.HashMap) EvidenceItem(gov.nih.nci.ctd2.dashboard.api.EvidenceItem) Evidence(gov.nih.nci.ctd2.dashboard.model.Evidence) TypedQuery(javax.persistence.TypedQuery) ObservationTemplate(gov.nih.nci.ctd2.dashboard.model.ObservationTemplate) SubjectWithSummaries(gov.nih.nci.ctd2.dashboard.util.SubjectWithSummaries) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Search(org.hibernate.search.Search) Summary(gov.nih.nci.ctd2.dashboard.util.Summary) SubjectImpl(gov.nih.nci.ctd2.dashboard.impl.SubjectImpl) AnimalModel(gov.nih.nci.ctd2.dashboard.model.AnimalModel) ObservedEvidence(gov.nih.nci.ctd2.dashboard.model.ObservedEvidence) WordCloudEntry(gov.nih.nci.ctd2.dashboard.util.WordCloudEntry) SubjectWithOrganismImpl(gov.nih.nci.ctd2.dashboard.impl.SubjectWithOrganismImpl) ObservationURIsAndTiers(gov.nih.nci.ctd2.dashboard.util.ObservationURIsAndTiers) FlushMode(org.hibernate.FlushMode) SubjectItem(gov.nih.nci.ctd2.dashboard.api.SubjectItem) Hierarchy(gov.nih.nci.ctd2.dashboard.util.Hierarchy) Annotation(gov.nih.nci.ctd2.dashboard.model.Annotation) ObservationTemplateImpl(gov.nih.nci.ctd2.dashboard.impl.ObservationTemplateImpl) Synonym(gov.nih.nci.ctd2.dashboard.model.Synonym) Protein(gov.nih.nci.ctd2.dashboard.model.Protein) Log(org.apache.commons.logging.Log) DashboardFactory(gov.nih.nci.ctd2.dashboard.model.DashboardFactory) ArrayList(java.util.ArrayList) NoResultException(javax.persistence.NoResultException) BigInteger(java.math.BigInteger) TissueSample(gov.nih.nci.ctd2.dashboard.model.TissueSample) SubjectResult(gov.nih.nci.ctd2.dashboard.util.SubjectResult) ECOTerm(gov.nih.nci.ctd2.dashboard.model.ECOTerm) FullTextSession(org.hibernate.search.FullTextSession) Session(org.hibernate.Session)

Example 3 with TissueSample

use of gov.nih.nci.ctd2.dashboard.model.TissueSample in project nci-ctd2-dashboard by CBIIT.

the class TissueSampleSynonymsDataFieldSetMapper method mapFieldSet.

public TissueSample mapFieldSet(FieldSet fieldSet) throws BindException {
    String nciThesaurusCode = fieldSet.readString(NCI_THESAURUS_CODE);
    TissueSample tissueSample = tissueSampleMap.get(nciThesaurusCode);
    if (tissueSample != null) {
        Synonym synonym = dashboardFactory.create(Synonym.class);
        synonym.setDisplayName(fieldSet.readString(SYNONYM));
        tissueSample.getSynonyms().add(synonym);
    }
    return tissueSample;
}
Also used : TissueSample(gov.nih.nci.ctd2.dashboard.model.TissueSample) Synonym(gov.nih.nci.ctd2.dashboard.model.Synonym)

Example 4 with TissueSample

use of gov.nih.nci.ctd2.dashboard.model.TissueSample in project nci-ctd2-dashboard by CBIIT.

the class TissueSampleDataWriter method execute.

public RepeatStatus execute(StepContribution arg0, ChunkContext arg1) throws Exception {
    ArrayList<DashboardEntity> entities = new ArrayList<DashboardEntity>();
    for (TissueSample tissueSample : tissueSampleMap.values()) {
        String nciThesaurusId = getNCIThesaurusID(tissueSample);
        String stableURL = new StableURL().createURLWithPrefix("tissue", nciThesaurusId);
        tissueSample.setStableURL(stableURL);
        entities.add(tissueSample);
    }
    dashboardDao.batchSave(entities, batchSize);
    return RepeatStatus.FINISHED;
}
Also used : StableURL(gov.nih.nci.ctd2.dashboard.util.StableURL)

Example 5 with TissueSample

use of gov.nih.nci.ctd2.dashboard.model.TissueSample in project nci-ctd2-dashboard by CBIIT.

the class TissueSampleTermsDataFieldSetMapper method mapFieldSet.

public TissueSample mapFieldSet(FieldSet fieldSet) throws BindException {
    TissueSample tissueSample = dashboardFactory.create(TissueSample.class);
    tissueSample.setDisplayName(fieldSet.readString(TISSUE_SAMPLE_NAME));
    // create xref to NCI thesaurus
    String nciThesaurusCode = fieldSet.readString(NCI_THESAURUS_CODE);
    tissueSample.setCode(Integer.parseInt(nciThesaurusCode.substring(1)));
    if (!nciThesaurusCode.isEmpty()) {
        addXrefToSample(tissueSample, nciThesaurusCode, NCI_THESAURUS_DATABASE);
    }
    // create xref to NCI thesaurus (parent)
    String parents = fieldSet.readString(PARENTS);
    if (!parents.isEmpty()) {
        for (String parentThesaurusCode : parents.split(";")) {
            addXrefToSample(tissueSample, parentThesaurusCode, NCI_PARENT_THESAURUS_DATABASE);
        }
    }
    tissueSampleMap.put(nciThesaurusCode, tissueSample);
    return tissueSample;
}
Also used : TissueSample(gov.nih.nci.ctd2.dashboard.model.TissueSample)

Aggregations

TissueSample (gov.nih.nci.ctd2.dashboard.model.TissueSample)4 AnimalModel (gov.nih.nci.ctd2.dashboard.model.AnimalModel)2 Annotation (gov.nih.nci.ctd2.dashboard.model.Annotation)2 CellSample (gov.nih.nci.ctd2.dashboard.model.CellSample)2 Compound (gov.nih.nci.ctd2.dashboard.model.Compound)2 Gene (gov.nih.nci.ctd2.dashboard.model.Gene)2 Observation (gov.nih.nci.ctd2.dashboard.model.Observation)2 ObservationTemplate (gov.nih.nci.ctd2.dashboard.model.ObservationTemplate)2 Organism (gov.nih.nci.ctd2.dashboard.model.Organism)2 Protein (gov.nih.nci.ctd2.dashboard.model.Protein)2 ShRna (gov.nih.nci.ctd2.dashboard.model.ShRna)2 Subject (gov.nih.nci.ctd2.dashboard.model.Subject)2 Synonym (gov.nih.nci.ctd2.dashboard.model.Synonym)2 Xref (gov.nih.nci.ctd2.dashboard.model.Xref)2 SearchResults (gov.nih.nci.ctd2.dashboard.util.SearchResults)2 SubjectResult (gov.nih.nci.ctd2.dashboard.util.SubjectResult)2 BigInteger (java.math.BigInteger)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 EvidenceItem (gov.nih.nci.ctd2.dashboard.api.EvidenceItem)1