Search in sources :

Example 1 with SubmissionCenter

use of gov.nih.nci.ctd2.dashboard.model.SubmissionCenter in project nci-ctd2-dashboard by CBIIT.

the class DashboardDaoImpl method search.

@Override
@Cacheable(value = "searchCache")
public SearchResults search(String queryString) {
    queryString = queryString.trim();
    final String[] searchTerms = parseWords(queryString);
    log.debug("search terms: " + String.join(",", searchTerms));
    Map<Subject, Integer> subjects = new HashMap<Subject, Integer>();
    Map<Submission, Integer> submissions = new HashMap<Submission, Integer>();
    for (String singleTerm : searchTerms) {
        searchSingleTerm(singleTerm, subjects, submissions);
    }
    SearchResults searchResults = new SearchResults();
    searchResults.submission_result = submissions.keySet().stream().map(submission -> {
        ObservationTemplate template = submission.getObservationTemplate();
        return new SearchResults.SubmissionResult(submission.getStableURL(), submission.getSubmissionDate(), template.getDescription(), template.getTier(), template.getSubmissionCenter().getDisplayName(), submission.getId(), findObservationsBySubmission(submission).size(), template.getIsSubmissionStory());
    }).collect(Collectors.toList());
    Map<String, Set<Observation>> observationMap = new HashMap<String, Set<Observation>>();
    List<SubjectResult> subject_result = new ArrayList<SubjectResult>();
    for (Subject subject : subjects.keySet()) {
        Set<Observation> observations = new HashSet<Observation>();
        Set<SubmissionCenter> submissionCenters = new HashSet<SubmissionCenter>();
        Set<String> roles = new HashSet<String>();
        for (ObservedSubject observedSubject : findObservedSubjectBySubject(subject)) {
            Observation observation = observedSubject.getObservation();
            observations.add(observation);
            ObservationTemplate observationTemplate = observation.getSubmission().getObservationTemplate();
            submissionCenters.add(observationTemplate.getSubmissionCenter());
            roles.add(observedSubject.getObservedSubjectRole().getSubjectRole().getDisplayName());
        }
        SubjectResult x = new SubjectResult(subject, observations.size(), submissionCenters.size(), subjects.get(subject), roles);
        Arrays.stream(searchTerms).filter(term -> matchSubject(term, subject)).forEach(term -> {
            Set<Observation> obset = observationMap.get(term);
            if (obset == null) {
                obset = new HashSet<Observation>();
            }
            obset.addAll(observations);
            observationMap.put(term, obset);
        });
        subject_result.add(x);
    }
    /* search ECO terms */
    List<ECOTerm> ecoterms = findECOTerms(queryString);
    for (ECOTerm ecoterm : ecoterms) {
        List<Integer> observationIds = observationIdsForEcoCode(ecoterm.getCode());
        int observationNumber = observationIds.size();
        if (observationNumber == 0)
            continue;
        SubjectResult entity = new SubjectResult(ecoterm, observationNumber, centerCount(ecoterm.getCode()), null, // no matchNumber, no roles
        null);
        subject_result.add(entity);
        Set<Observation> observations = new HashSet<Observation>();
        observationIds.forEach(obid -> observations.add(getEntityById(Observation.class, obid)));
        Arrays.stream(searchTerms).filter(term -> ecoterm.containsTerm(term)).forEach(term -> {
            Set<Observation> obset = observationMap.get(term);
            if (obset == null) {
                obset = new HashSet<Observation>();
            }
            obset.addAll(observations);
            observationMap.put(term, obset);
        });
    }
    /*
         * Limit the size. This should be done more efficiently during the process of
         * builing up of the list.
         * Because the limit needs to be based on 'match number' ranking, which depends
         * on all terms, an efficient algorithm is not obvious.
         * Unfortunately, we also have to do this after processing all results because
         * we need (in fact more often) observation numbers as well in ranking. TODO
         */
    if (subject_result.size() > maxNumberOfSearchResults) {
        searchResults.oversized = subject_result.size();
        subject_result = subject_result.stream().sorted(new SearchResultComparator()).limit(maxNumberOfSearchResults).collect(Collectors.toList());
        log.debug("size after limiting: " + subject_result.size());
    }
    searchResults.subject_result = subject_result;
    if (searchTerms.length <= 1) {
        return searchResults;
    }
    // add intersection of observations
    Set<Observation> set0 = observationMap.get(searchTerms[0]);
    if (set0 == null) {
        log.debug("no observation for " + searchTerms[0]);
        return searchResults;
    }
    log.debug("set0 size=" + set0.size());
    for (int i = 1; i < searchTerms.length; i++) {
        Set<Observation> obset = observationMap.get(searchTerms[i]);
        if (obset == null) {
            log.debug("... no observation for " + searchTerms[i]);
            return searchResults;
        }
        log.debug("set " + i + " size=" + obset.size());
        set0.retainAll(obset);
    }
    // set0 is now the intersection
    if (set0.size() == 0) {
        log.debug("no intersection of observations");
    }
    if (set0.size() > maxNumberOfSearchResults) {
        searchResults.oversized_observations = set0.size();
        // no particular ranking is enforced when limiting
        set0 = set0.stream().limit(maxNumberOfSearchResults).collect(Collectors.toSet());
        log.debug("observation results count after limiting: " + set0.size());
    }
    searchResults.observation_result = new ArrayList<Observation>(set0);
    return searchResults;
}
Also used : Query(org.apache.lucene.search.Query) ObservedEvidenceRole(gov.nih.nci.ctd2.dashboard.model.ObservedEvidenceRole) Transcript(gov.nih.nci.ctd2.dashboard.model.Transcript) Arrays(java.util.Arrays) DashboardDao(gov.nih.nci.ctd2.dashboard.dao.DashboardDao) ObservedSubjectRole(gov.nih.nci.ctd2.dashboard.model.ObservedSubjectRole) Cacheable(org.springframework.cache.annotation.Cacheable) NoResultException(javax.persistence.NoResultException) XRefItem(gov.nih.nci.ctd2.dashboard.api.XRefItem) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) Matcher(java.util.regex.Matcher) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) Map(java.util.Map) DashboardEntity(gov.nih.nci.ctd2.dashboard.model.DashboardEntity) CriteriaBuilder(javax.persistence.criteria.CriteriaBuilder) BigInteger(java.math.BigInteger) TissueSample(gov.nih.nci.ctd2.dashboard.model.TissueSample) Organism(gov.nih.nci.ctd2.dashboard.model.Organism) CriteriaQuery(javax.persistence.criteria.CriteriaQuery) ScrollableResults(org.hibernate.ScrollableResults) SearchResults(gov.nih.nci.ctd2.dashboard.util.SearchResults) SubjectResult(gov.nih.nci.ctd2.dashboard.util.SubjectResult) Collection(java.util.Collection) SessionFactory(org.hibernate.SessionFactory) Set(java.util.Set) FullTextQuery(org.hibernate.search.FullTextQuery) CellSample(gov.nih.nci.ctd2.dashboard.model.CellSample) Compound(gov.nih.nci.ctd2.dashboard.model.Compound) ECOTerm(gov.nih.nci.ctd2.dashboard.model.ECOTerm) SubjectWithOrganism(gov.nih.nci.ctd2.dashboard.model.SubjectWithOrganism) Collectors(java.util.stream.Collectors) TissueSampleImpl(gov.nih.nci.ctd2.dashboard.impl.TissueSampleImpl) ObservationItem(gov.nih.nci.ctd2.dashboard.api.ObservationItem) List(java.util.List) Xref(gov.nih.nci.ctd2.dashboard.model.Xref) EcoBrowse(gov.nih.nci.ctd2.dashboard.util.EcoBrowse) CompoundImpl(gov.nih.nci.ctd2.dashboard.impl.CompoundImpl) ScrollMode(org.hibernate.ScrollMode) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) DashboardEntityImpl(gov.nih.nci.ctd2.dashboard.impl.DashboardEntityImpl) Gene(gov.nih.nci.ctd2.dashboard.model.Gene) Pattern(java.util.regex.Pattern) LogFactory(org.apache.commons.logging.LogFactory) ShRna(gov.nih.nci.ctd2.dashboard.model.ShRna) Observation(gov.nih.nci.ctd2.dashboard.model.Observation) ParseException(org.apache.lucene.queryparser.classic.ParseException) FullTextSession(org.hibernate.search.FullTextSession) SubmissionImpl(gov.nih.nci.ctd2.dashboard.impl.SubmissionImpl) Subject(gov.nih.nci.ctd2.dashboard.model.Subject) Submission(gov.nih.nci.ctd2.dashboard.model.Submission) Session(org.hibernate.Session) HashMap(java.util.HashMap) EvidenceItem(gov.nih.nci.ctd2.dashboard.api.EvidenceItem) Evidence(gov.nih.nci.ctd2.dashboard.model.Evidence) TypedQuery(javax.persistence.TypedQuery) ObservationTemplate(gov.nih.nci.ctd2.dashboard.model.ObservationTemplate) SubjectWithSummaries(gov.nih.nci.ctd2.dashboard.util.SubjectWithSummaries) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Search(org.hibernate.search.Search) Summary(gov.nih.nci.ctd2.dashboard.util.Summary) SubjectImpl(gov.nih.nci.ctd2.dashboard.impl.SubjectImpl) AnimalModel(gov.nih.nci.ctd2.dashboard.model.AnimalModel) ObservedEvidence(gov.nih.nci.ctd2.dashboard.model.ObservedEvidence) WordCloudEntry(gov.nih.nci.ctd2.dashboard.util.WordCloudEntry) SubjectWithOrganismImpl(gov.nih.nci.ctd2.dashboard.impl.SubjectWithOrganismImpl) ObservationURIsAndTiers(gov.nih.nci.ctd2.dashboard.util.ObservationURIsAndTiers) FlushMode(org.hibernate.FlushMode) SubjectItem(gov.nih.nci.ctd2.dashboard.api.SubjectItem) Hierarchy(gov.nih.nci.ctd2.dashboard.util.Hierarchy) Annotation(gov.nih.nci.ctd2.dashboard.model.Annotation) ObservationTemplateImpl(gov.nih.nci.ctd2.dashboard.impl.ObservationTemplateImpl) Synonym(gov.nih.nci.ctd2.dashboard.model.Synonym) Protein(gov.nih.nci.ctd2.dashboard.model.Protein) Log(org.apache.commons.logging.Log) DashboardFactory(gov.nih.nci.ctd2.dashboard.model.DashboardFactory) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SearchResults(gov.nih.nci.ctd2.dashboard.util.SearchResults) ObservationTemplate(gov.nih.nci.ctd2.dashboard.model.ObservationTemplate) ECOTerm(gov.nih.nci.ctd2.dashboard.model.ECOTerm) HashSet(java.util.HashSet) Submission(gov.nih.nci.ctd2.dashboard.model.Submission) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) Subject(gov.nih.nci.ctd2.dashboard.model.Subject) BigInteger(java.math.BigInteger) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) SubjectResult(gov.nih.nci.ctd2.dashboard.util.SubjectResult) Observation(gov.nih.nci.ctd2.dashboard.model.Observation) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) Cacheable(org.springframework.cache.annotation.Cacheable)

Example 2 with SubmissionCenter

use of gov.nih.nci.ctd2.dashboard.model.SubmissionCenter in project nci-ctd2-dashboard by CBIIT.

the class SubjectScorer method scoreAllRoles.

@Transactional
public void scoreAllRoles() {
    log.info("Removing all role-based scores...");
    List<SubjectWithSummaries> oldEntities = dashboardDao.findEntities(SubjectWithSummaries.class);
    for (SubjectWithSummaries subjectWithSummaries : oldEntities) {
        dashboardDao.delete(subjectWithSummaries);
    }
    log.info("Removed " + oldEntities.size() + " old scores.");
    log.info("Re-scoring all roles...");
    List<SubjectWithSummaries> subjectWithSummariesList = new ArrayList<SubjectWithSummaries>();
    List<SubjectRole> entities = dashboardDao.findEntities(SubjectRole.class);
    for (SubjectRole subjectRole : entities) {
        String keyword = subjectRole.getDisplayName();
        log.info("Scoring subject with role: " + keyword);
        HashMap<Subject, SubjectWithSummaries> subjectToSummaries = new HashMap<Subject, SubjectWithSummaries>();
        HashMap<Subject, HashSet<SubmissionCenter>> subjectToCenters = new HashMap<Subject, HashSet<SubmissionCenter>>();
        HashMap<Subject, HashMap<SubmissionCenter, Integer>> centerBasedScores = new HashMap<Subject, HashMap<SubmissionCenter, Integer>>();
        for (ObservedSubject observedSubject : dashboardDao.findObservedSubjectByRole(keyword)) {
            Subject subject = observedSubject.getSubject();
            SubjectWithSummaries withSummaries = subjectToSummaries.get(subject);
            ObservationTemplate observationTemplate = observedSubject.getObservation().getSubmission().getObservationTemplate();
            SubmissionCenter submissionCenter = observationTemplate.getSubmissionCenter();
            Integer tier = observationTemplate.getTier();
            if (withSummaries == null) {
                withSummaries = new SubjectWithSummaries();
                withSummaries.setRole(keyword);
                withSummaries.setSubject(subject);
                withSummaries.setMaxTier(tier);
                withSummaries.setNumberOfObservations(1);
                HashSet<SubmissionCenter> centers = new HashSet<SubmissionCenter>();
                centers.add(submissionCenter);
                withSummaries.setNumberOfSubmissionCenters(1);
                withSummaries.addSubmission(tier, submissionCenter.getId());
                subjectToCenters.put(subject, centers);
                subjectToSummaries.put(subject, withSummaries);
                HashMap<SubmissionCenter, Integer> cScores = new HashMap<>();
                cScores.put(submissionCenter, tier);
                centerBasedScores.put(subject, cScores);
            } else {
                withSummaries.setMaxTier(Math.max(withSummaries.getMaxTier(), tier));
                withSummaries.setNumberOfObservations(withSummaries.getNumberOfObservations() + 1);
                HashSet<SubmissionCenter> submissionCenters = subjectToCenters.get(subject);
                submissionCenters.add(submissionCenter);
                withSummaries.setNumberOfSubmissionCenters(submissionCenters.size());
                withSummaries.addSubmission(tier, submissionCenter.getId());
                HashMap<SubmissionCenter, Integer> cScores = centerBasedScores.get(subject);
                Integer previousScore = cScores.get(submissionCenter);
                cScores.put(submissionCenter, previousScore == null ? tier : Math.max(tier, previousScore));
            }
        }
        Collection<SubjectWithSummaries> perRole = subjectToSummaries.values();
        for (SubjectWithSummaries subjectWithSummaries : perRole) {
            Integer totalScore = 0;
            for (Integer aScore : centerBasedScores.get(subjectWithSummaries.getSubject()).values()) {
                totalScore += aScore;
            }
            subjectWithSummaries.setScore(totalScore);
        }
        subjectWithSummariesList.addAll(perRole);
        log.info("Done scoring role: " + keyword);
    }
    dashboardDao.batchSave(subjectWithSummariesList, 0);
    log.info("Done scoring all roles...");
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Subject(gov.nih.nci.ctd2.dashboard.model.Subject) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) SubjectRole(gov.nih.nci.ctd2.dashboard.model.SubjectRole) ObservationTemplate(gov.nih.nci.ctd2.dashboard.model.ObservationTemplate) ObservedSubject(gov.nih.nci.ctd2.dashboard.model.ObservedSubject) HashSet(java.util.HashSet) Transactional(org.springframework.transaction.annotation.Transactional)

Example 3 with SubmissionCenter

use of gov.nih.nci.ctd2.dashboard.model.SubmissionCenter in project nci-ctd2-dashboard by CBIIT.

the class CentersAPI method getCenters.

@Transactional
@RequestMapping(method = { RequestMethod.GET }, headers = "Accept=application/json")
public ResponseEntity<String> getCenters() {
    HttpHeaders headers = new HttpHeaders();
    headers.add("Content-Type", "application/json; charset=utf-8");
    List<SubmissionCenter> centers = dashboardDao.findEntities(SubmissionCenter.class);
    APICenter[] apiCenters = new APICenter[centers.size()];
    int centerIndex = 0;
    for (SubmissionCenter center : centers) {
        List<Submission> submissions = dashboardDao.findSubmissionBySubmissionCenter(center);
        String[] ss = submissions.stream().map(x -> x.getStableURL()).toArray(String[]::new);
        // design flaw
        String pi = submissions.get(0).getObservationTemplate().getPrincipalInvestigator();
        apiCenters[centerIndex++] = new APICenter(center, pi, ss);
    }
    log.debug("ready to serialize");
    JSONSerializer jsonSerializer = new JSONSerializer().transform(new ImplTransformer(), Class.class).transform(new SimpleDateTransformer(), Date.class).transform(new ExcludeTransformer(), void.class);
    String json = "{}";
    try {
        json = jsonSerializer.exclude("class").exclude("submissions.class").deepSerialize(apiCenters);
    } catch (Exception e) {
        e.printStackTrace();
        return new ResponseEntity<String>(headers, HttpStatus.NOT_FOUND);
    }
    return new ResponseEntity<String>(json, headers, HttpStatus.OK);
}
Also used : ExcludeTransformer(gov.nih.nci.ctd2.dashboard.api.ExcludeTransformer) DashboardDao(gov.nih.nci.ctd2.dashboard.dao.DashboardDao) Date(java.util.Date) HttpHeaders(org.springframework.http.HttpHeaders) Submission(gov.nih.nci.ctd2.dashboard.model.Submission) Autowired(org.springframework.beans.factory.annotation.Autowired) RequestMapping(org.springframework.web.bind.annotation.RequestMapping) RequestMethod(org.springframework.web.bind.annotation.RequestMethod) Controller(org.springframework.stereotype.Controller) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) HttpStatus(org.springframework.http.HttpStatus) List(java.util.List) JSONSerializer(flexjson.JSONSerializer) Log(org.apache.commons.logging.Log) ResponseEntity(org.springframework.http.ResponseEntity) SimpleDateTransformer(gov.nih.nci.ctd2.dashboard.api.SimpleDateTransformer) LogFactory(org.apache.commons.logging.LogFactory) ImplTransformer(gov.nih.nci.ctd2.dashboard.util.ImplTransformer) Transactional(org.springframework.transaction.annotation.Transactional) HttpHeaders(org.springframework.http.HttpHeaders) Submission(gov.nih.nci.ctd2.dashboard.model.Submission) ImplTransformer(gov.nih.nci.ctd2.dashboard.util.ImplTransformer) Date(java.util.Date) SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) ResponseEntity(org.springframework.http.ResponseEntity) SimpleDateTransformer(gov.nih.nci.ctd2.dashboard.api.SimpleDateTransformer) ExcludeTransformer(gov.nih.nci.ctd2.dashboard.api.ExcludeTransformer) JSONSerializer(flexjson.JSONSerializer) Transactional(org.springframework.transaction.annotation.Transactional) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 4 with SubmissionCenter

use of gov.nih.nci.ctd2.dashboard.model.SubmissionCenter in project nci-ctd2-dashboard by CBIIT.

the class ControlledVocabularyPerColumnWriter method write.

public void write(List<? extends ControlledVocabulary> items) throws Exception {
    if (entityCache == null)
        entityCache = new HashSet<DashboardEntity>();
    if (entities == null)
        entities = new ArrayList<DashboardEntity>();
    for (ControlledVocabulary controlledVocabulary : items) {
        String observedRoleName = "";
        if (controlledVocabulary.observedRole instanceof ObservedSubjectRole) {
            observedRoleName = ((ObservedSubjectRole) controlledVocabulary.observedRole).getColumnName();
        } else if (controlledVocabulary.observedRole instanceof ObservedEvidenceRole) {
            observedRoleName = ((ObservedEvidenceRole) controlledVocabulary.observedRole).getColumnName();
        }
        log.info("Storing Observed Role: " + observedRoleName);
        if (!entityCache.contains(controlledVocabulary.role)) {
            entityCache.add(controlledVocabulary.role);
            entities.add(controlledVocabulary.role);
        }
        ObservationTemplate ot = (ObservationTemplate) controlledVocabulary.observationTemplate;
        if (!entityCache.contains(ot.getSubmissionCenter())) {
            SubmissionCenter submissionCenter = dashboardDao.findSubmissionCenterByName(ot.getSubmissionCenter().getDisplayName());
            if (submissionCenter == null) {
                entities.add(ot.getSubmissionCenter());
            }
            entityCache.add(ot.getSubmissionCenter());
        }
        if (!entityCache.contains(controlledVocabulary.observationTemplate)) {
            entityCache.add(controlledVocabulary.observationTemplate);
            entities.add(controlledVocabulary.observationTemplate);
        }
        if (!entityCache.contains(controlledVocabulary.observedRole)) {
            entityCache.add(controlledVocabulary.observedRole);
            entities.add(controlledVocabulary.observedRole);
        }
    }
    dashboardDao.batchSave(entities, batchSize);
}
Also used : SubmissionCenter(gov.nih.nci.ctd2.dashboard.model.SubmissionCenter) ArrayList(java.util.ArrayList) ObservedSubjectRole(gov.nih.nci.ctd2.dashboard.model.ObservedSubjectRole) ObservationTemplate(gov.nih.nci.ctd2.dashboard.model.ObservationTemplate) ObservedEvidenceRole(gov.nih.nci.ctd2.dashboard.model.ObservedEvidenceRole) HashSet(java.util.HashSet)

Aggregations

SubmissionCenter (gov.nih.nci.ctd2.dashboard.model.SubmissionCenter)4 ObservationTemplate (gov.nih.nci.ctd2.dashboard.model.ObservationTemplate)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 DashboardDao (gov.nih.nci.ctd2.dashboard.dao.DashboardDao)2 ObservedEvidenceRole (gov.nih.nci.ctd2.dashboard.model.ObservedEvidenceRole)2 ObservedSubject (gov.nih.nci.ctd2.dashboard.model.ObservedSubject)2 ObservedSubjectRole (gov.nih.nci.ctd2.dashboard.model.ObservedSubjectRole)2 Subject (gov.nih.nci.ctd2.dashboard.model.Subject)2 Submission (gov.nih.nci.ctd2.dashboard.model.Submission)2 HashMap (java.util.HashMap)2 JSONSerializer (flexjson.JSONSerializer)1 EvidenceItem (gov.nih.nci.ctd2.dashboard.api.EvidenceItem)1 ExcludeTransformer (gov.nih.nci.ctd2.dashboard.api.ExcludeTransformer)1 ObservationItem (gov.nih.nci.ctd2.dashboard.api.ObservationItem)1 SimpleDateTransformer (gov.nih.nci.ctd2.dashboard.api.SimpleDateTransformer)1 SubjectItem (gov.nih.nci.ctd2.dashboard.api.SubjectItem)1 XRefItem (gov.nih.nci.ctd2.dashboard.api.XRefItem)1 CompoundImpl (gov.nih.nci.ctd2.dashboard.impl.CompoundImpl)1 DashboardEntityImpl (gov.nih.nci.ctd2.dashboard.impl.DashboardEntityImpl)1