Search in sources :

Example 1 with SearchHitsIterator

use of org.springframework.data.elasticsearch.core.SearchHitsIterator in project snowstorm by IHTSDO.

the class AdminOperationsService method reduceVersionsReplaced.

public Map<Class, AtomicLong> reduceVersionsReplaced(String branch) {
    // For all entries in the versionsReplaced map check if the document is from a child branch. If so remove from the set.
    Branch latest = branchService.findBranchOrThrow(branch);
    Map<Class, AtomicLong> reducedByType = new HashMap<>();
    Map<String, Set<String>> versionsReplaced = latest.getVersionsReplaced();
    final Map<Class<? extends DomainEntity>, ElasticsearchRepository> componentTypeRepoMap = domainEntityConfiguration.getAllTypeRepositoryMap();
    for (Class<? extends DomainEntity> type : componentTypeRepoMap.keySet()) {
        Set<String> toRemove = new HashSet<>();
        Set<String> versionsReplacedForType = versionsReplaced.getOrDefault(type.getSimpleName(), Collections.emptySet());
        for (List<String> versionsReplacedSegment : Iterables.partition(versionsReplacedForType, 1_000)) {
            try (final SearchHitsIterator<? extends DomainEntity> entitiesReplaced = elasticsearchTemplate.searchForStream(new NativeSearchQueryBuilder().withQuery(boolQuery().must(prefixQuery("path", branch + "/")).must(termsQuery("_id", versionsReplacedSegment))).withPageable(ConceptService.LARGE_PAGE).build(), type)) {
                entitiesReplaced.forEachRemaining(entity -> toRemove.add(entity.getId()));
            }
        }
        if (!toRemove.isEmpty()) {
            versionsReplacedForType.removeAll(toRemove);
            reducedByType.computeIfAbsent(type, (t) -> new AtomicLong(0)).addAndGet(toRemove.size());
        }
    }
    latest.setVersionsReplaced(versionsReplaced);
    branchRepository.save(latest);
    return reducedByType;
}
Also used : Iterables.partition(com.google.common.collect.Iterables.partition) Iterables(com.google.common.collect.Iterables) SortBuilders(org.elasticsearch.search.sort.SortBuilders) java.util(java.util) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) Query(org.springframework.data.elasticsearch.core.query.Query) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) DescriptionHelper(org.snomed.snowstorm.core.util.DescriptionHelper) Lists(com.google.common.collect.Lists) IdentifierService(org.snomed.snowstorm.core.data.services.identifier.IdentifierService) Service(org.springframework.stereotype.Service) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) Pair(org.springframework.data.util.Pair) ElasticsearchRepository(org.springframework.data.elasticsearch.repository.ElasticsearchRepository) Commit(io.kaicode.elasticvc.domain.Commit) BranchRepository(io.kaicode.elasticvc.repositories.BranchRepository) VersionControlHelper(io.kaicode.elasticvc.api.VersionControlHelper) DomainEntity(io.kaicode.elasticvc.domain.DomainEntity) Logger(org.slf4j.Logger) UpdateQuery(org.springframework.data.elasticsearch.core.query.UpdateQuery) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) LARGE_PAGE(io.kaicode.elasticvc.api.ComponentService.LARGE_PAGE) String.format(java.lang.String.format) AtomicLong(java.util.concurrent.atomic.AtomicLong) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) ElasticsearchOperations(org.springframework.data.elasticsearch.core.ElasticsearchOperations) SearchHitsIterator(org.springframework.data.elasticsearch.core.SearchHitsIterator) RF2Constants(org.snomed.snowstorm.core.rf2.RF2Constants) Branch(io.kaicode.elasticvc.domain.Branch) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) BranchService(io.kaicode.elasticvc.api.BranchService) org.snomed.snowstorm.core.data.domain(org.snomed.snowstorm.core.data.domain) BufferedReader(java.io.BufferedReader) NativeSearchQuery(org.springframework.data.elasticsearch.core.query.NativeSearchQuery) Long.parseLong(java.lang.Long.parseLong) PathUtil(io.kaicode.elasticvc.api.PathUtil) SearchLanguagesConfiguration(org.snomed.snowstorm.config.SearchLanguagesConfiguration) Document(org.springframework.data.elasticsearch.core.document.Document) InputStream(java.io.InputStream) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) AtomicLong(java.util.concurrent.atomic.AtomicLong) Branch(io.kaicode.elasticvc.domain.Branch) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) ElasticsearchRepository(org.springframework.data.elasticsearch.repository.ElasticsearchRepository) DomainEntity(io.kaicode.elasticvc.domain.DomainEntity) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet)

Example 2 with SearchHitsIterator

use of org.springframework.data.elasticsearch.core.SearchHitsIterator in project snowstorm by IHTSDO.

the class DescriptionService method findDescriptionsWithAggregations.

public PageWithBucketAggregations<Description> findDescriptionsWithAggregations(String path, DescriptionCriteria criteria, PageRequest pageRequest) throws TooCostlyException {
    TimerUtil timer = new TimerUtil("Search", Level.INFO, 5, new TimerUtil("Search DEBUG", Level.DEBUG));
    final BranchCriteria branchCriteria = versionControlHelper.getBranchCriteria(path);
    timer.checkpoint("Build branch criteria");
    // Fetch all matching description and concept ids
    // ids of concepts where all descriptions and concept criteria are met
    DescriptionMatches descriptionMatches = findDescriptionAndConceptIds(criteria, Collections.EMPTY_SET, branchCriteria, timer);
    BoolQueryBuilder descriptionQuery = descriptionMatches.getDescriptionQuery();
    // Apply concept and acceptability filtering for final search
    BoolQueryBuilder descriptionFilter = boolQuery();
    descriptionFilter.must(termsQuery(Description.Fields.DESCRIPTION_ID, descriptionMatches.getMatchedDescriptionIds()));
    // Start fetching aggregations..
    List<Aggregation> allAggregations = new ArrayList<>();
    Set<Long> conceptIds = descriptionMatches.getMatchedConceptIds();
    // Fetch FSN semantic tag aggregation
    BoolQueryBuilder fsnClauses = boolQuery();
    String semanticTag = criteria.getSemanticTag();
    Set<String> semanticTags = criteria.getSemanticTags();
    boolean semanticTagFiltering = !Strings.isNullOrEmpty(semanticTag) || !CollectionUtils.isEmpty(semanticTags);
    Set<String> allSemanticTags = new HashSet<>();
    if (semanticTagFiltering) {
        if (!Strings.isNullOrEmpty(semanticTag)) {
            allSemanticTags.add(semanticTag);
        }
        if (!CollectionUtils.isEmpty(semanticTags)) {
            allSemanticTags.addAll(semanticTags);
        }
        fsnClauses.must(termsQuery(Description.Fields.TAG, allSemanticTags));
    }
    NativeSearchQueryBuilder fsnQueryBuilder = new NativeSearchQueryBuilder().withQuery(fsnClauses.must(branchCriteria.getEntityBranchCriteria(Description.class)).must(termsQuery(Description.Fields.ACTIVE, true)).must(termsQuery(Description.Fields.TYPE_ID, Concepts.FSN)).must(termsQuery(Description.Fields.CONCEPT_ID, conceptIds))).addAggregation(AggregationBuilders.terms("semanticTags").field(Description.Fields.TAG).size(AGGREGATION_SEARCH_SIZE));
    if (!semanticTagFiltering) {
        fsnQueryBuilder.withPageable(PAGE_OF_ONE);
        SearchHits<Description> semanticTagResults = elasticsearchTemplate.search(fsnQueryBuilder.build(), Description.class);
        allAggregations.add(semanticTagResults.getAggregations().get("semanticTags"));
        timer.checkpoint("Semantic tag aggregation");
    } else {
        // Apply semantic tag filter
        fsnQueryBuilder.withPageable(LARGE_PAGE).withFields(Description.Fields.CONCEPT_ID);
        Set<Long> conceptSemanticTagMatches = new LongOpenHashSet();
        if (allSemanticTags.size() == 1) {
            try (SearchHitsIterator<Description> descriptionStream = elasticsearchTemplate.searchForStream(fsnQueryBuilder.build(), Description.class)) {
                descriptionStream.forEachRemaining(hit -> conceptSemanticTagMatches.add(parseLong(hit.getContent().getConceptId())));
            }
            allAggregations.add(new SimpleAggregation("semanticTags", allSemanticTags.iterator().next(), conceptSemanticTagMatches.size()));
        } else {
            SearchHits<Description> semanticTagResults = elasticsearchTemplate.search(fsnQueryBuilder.build(), Description.class);
            semanticTagResults.stream().forEach((hit -> conceptSemanticTagMatches.add(parseLong(hit.getContent().getConceptId()))));
            allAggregations.add(semanticTagResults.getAggregations().get("semanticTags"));
        }
        conceptIds = conceptSemanticTagMatches;
    }
    // Fetch concept refset membership aggregation
    SearchHits<ReferenceSetMember> membershipResults = elasticsearchTemplate.search(new NativeSearchQueryBuilder().withQuery(boolQuery().must(branchCriteria.getEntityBranchCriteria(ReferenceSetMember.class)).must(termsQuery(ReferenceSetMember.Fields.ACTIVE, true)).filter(termsQuery(ReferenceSetMember.Fields.REFERENCED_COMPONENT_ID, conceptIds))).withPageable(PAGE_OF_ONE).addAggregation(AggregationBuilders.terms("membership").field(ReferenceSetMember.Fields.REFSET_ID)).build(), ReferenceSetMember.class);
    allAggregations.add(membershipResults.getAggregations().get("membership"));
    timer.checkpoint("Concept refset membership aggregation");
    // Perform final paged description search with description property aggregations
    descriptionFilter.must(termsQuery(Description.Fields.CONCEPT_ID, conceptIds));
    final NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder().withQuery(descriptionQuery.filter(descriptionFilter)).addAggregation(AggregationBuilders.terms("module").field(Description.Fields.MODULE_ID)).addAggregation(AggregationBuilders.terms("language").field(Description.Fields.LANGUAGE_CODE)).withPageable(pageRequest);
    NativeSearchQuery aggregateQuery = addTermSort(queryBuilder.build());
    aggregateQuery.setTrackTotalHits(true);
    SearchHits<Description> descriptions = elasticsearchTemplate.search(aggregateQuery, Description.class);
    allAggregations.addAll(descriptions.getAggregations().asList());
    timer.checkpoint("Fetch descriptions including module and language aggregations");
    timer.finish();
    // Merge aggregations
    return PageWithBucketAggregationsFactory.createPage(descriptions, new Aggregations(allAggregations), pageRequest);
}
Also used : SortBuilders(org.elasticsearch.search.sort.SortBuilders) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) StringUtils(org.apache.commons.lang3.StringUtils) DescriptionHelper(org.snomed.snowstorm.core.util.DescriptionHelper) ParsedStringTerms(org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms) Sort(org.springframework.data.domain.Sort) Commit(io.kaicode.elasticvc.domain.Commit) VersionControlHelper(io.kaicode.elasticvc.api.VersionControlHelper) Config(org.snomed.snowstorm.config.Config) Aggregations(org.elasticsearch.search.aggregations.Aggregations) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) PageRequest(org.springframework.data.domain.PageRequest) Page(org.springframework.data.domain.Page) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) CollectionUtils(org.springframework.util.CollectionUtils) DescriptionCriteria(org.snomed.snowstorm.core.data.services.pojo.DescriptionCriteria) BranchService(io.kaicode.elasticvc.api.BranchService) org.snomed.snowstorm.core.data.domain(org.snomed.snowstorm.core.data.domain) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) LongLinkedOpenHashSet(it.unimi.dsi.fastutil.longs.LongLinkedOpenHashSet) TimerUtil(org.snomed.snowstorm.core.util.TimerUtil) PageImpl(org.springframework.data.domain.PageImpl) SearchLanguagesConfiguration(org.snomed.snowstorm.config.SearchLanguagesConfiguration) Iterables(com.google.common.collect.Iterables) java.util(java.util) CharArraySet(org.apache.lucene.analysis.CharArraySet) PageWithBucketAggregations(org.snomed.snowstorm.core.data.services.pojo.PageWithBucketAggregations) SimpleAggregation(org.snomed.snowstorm.core.data.services.pojo.SimpleAggregation) ComponentService(io.kaicode.elasticvc.api.ComponentService) Function(java.util.function.Function) Strings(org.elasticsearch.common.Strings) Value(org.springframework.beans.factory.annotation.Value) Operator(org.elasticsearch.index.query.Operator) IdentifierService(org.snomed.snowstorm.core.data.services.identifier.IdentifierService) Service(org.springframework.stereotype.Service) Aggregation(org.elasticsearch.search.aggregations.Aggregation) SearchHit(org.springframework.data.elasticsearch.core.SearchHit) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) PageWithBucketAggregationsFactory(org.snomed.snowstorm.core.data.services.pojo.PageWithBucketAggregationsFactory) Logger(org.slf4j.Logger) TokenStream(org.apache.lucene.analysis.TokenStream) SearchHits(org.springframework.data.elasticsearch.core.SearchHits) AggregationBuilders(org.elasticsearch.search.aggregations.AggregationBuilders) IOException(java.io.IOException) Long2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap) Level(ch.qos.logback.classic.Level) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) ElasticsearchOperations(org.springframework.data.elasticsearch.core.ElasticsearchOperations) SearchHitsIterator(org.springframework.data.elasticsearch.core.SearchHitsIterator) Branch(io.kaicode.elasticvc.domain.Branch) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) NativeSearchQuery(org.springframework.data.elasticsearch.core.query.NativeSearchQuery) Long.parseLong(java.lang.Long.parseLong) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) Aggregations(org.elasticsearch.search.aggregations.Aggregations) PageWithBucketAggregations(org.snomed.snowstorm.core.data.services.pojo.PageWithBucketAggregations) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) NativeSearchQuery(org.springframework.data.elasticsearch.core.query.NativeSearchQuery) SimpleAggregation(org.snomed.snowstorm.core.data.services.pojo.SimpleAggregation) Aggregation(org.elasticsearch.search.aggregations.Aggregation) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) TimerUtil(org.snomed.snowstorm.core.util.TimerUtil) Long.parseLong(java.lang.Long.parseLong) SimpleAggregation(org.snomed.snowstorm.core.data.services.pojo.SimpleAggregation) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) LongLinkedOpenHashSet(it.unimi.dsi.fastutil.longs.LongLinkedOpenHashSet) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet)

Example 3 with SearchHitsIterator

use of org.springframework.data.elasticsearch.core.SearchHitsIterator in project snowstorm by IHTSDO.

the class TraceabilityLogService method filterRefsetMembersAndLookupComponentConceptIds.

private Map<Long, List<ReferenceSetMember>> filterRefsetMembersAndLookupComponentConceptIds(Iterable<ReferenceSetMember> persistedReferenceSetMembers, Commit commit, Map<Long, Long> componentToConceptIdMap) {
    Map<Long, List<ReferenceSetMember>> conceptToMembersMap = new Long2ObjectArrayMap<>();
    List<ReferenceSetMember> membersToLog = new ArrayList<>();
    Set<Long> referencedDescriptions = new LongOpenHashSet();
    Set<Long> referencedRelationships = new LongOpenHashSet();
    for (ReferenceSetMember refsetMember : persistedReferenceSetMembers) {
        String conceptId = refsetMember.getConceptId();
        if (conceptId != null) {
            conceptToMembersMap.computeIfAbsent(parseLong(conceptId), id -> new ArrayList<>()).add(refsetMember);
        } else {
            final String referencedComponentId = refsetMember.getReferencedComponentId();
            if (IdentifierService.isConceptId(referencedComponentId)) {
                conceptToMembersMap.computeIfAbsent(Long.parseLong(referencedComponentId), id -> new ArrayList<>()).add(refsetMember);
            } else {
                membersToLog.add(refsetMember);
                if (IdentifierService.isDescriptionId(referencedComponentId)) {
                    referencedDescriptions.add(parseLong(referencedComponentId));
                } else if (IdentifierService.isRelationshipId(referencedComponentId)) {
                    referencedRelationships.add(parseLong(referencedComponentId));
                }
            }
        }
    }
    final Set<Long> descriptionIdsToLookup = referencedDescriptions.stream().filter(Predicate.not(componentToConceptIdMap::containsKey)).collect(Collectors.toSet());
    final Set<Long> relationshipIdsToLookup = referencedRelationships.stream().filter(Predicate.not(componentToConceptIdMap::containsKey)).collect(Collectors.toSet());
    BranchCriteria branchCriteria = null;
    if (!descriptionIdsToLookup.isEmpty()) {
        branchCriteria = versionControlHelper.getBranchCriteria(commit.getBranch());
        for (List<Long> descriptionIdsSegment : Iterables.partition(descriptionIdsToLookup, CLAUSE_LIMIT)) {
            try (final SearchHitsIterator<Description> stream = elasticsearchTemplate.searchForStream(new NativeSearchQueryBuilder().withQuery(branchCriteria.getEntityBranchCriteria(Description.class).must(termsQuery(Description.Fields.DESCRIPTION_ID, descriptionIdsSegment))).withFields(Description.Fields.DESCRIPTION_ID, Description.Fields.CONCEPT_ID).withPageable(LARGE_PAGE).build(), Description.class)) {
                stream.forEachRemaining(hit -> {
                    final Description description = hit.getContent();
                    componentToConceptIdMap.put(parseLong(description.getDescriptionId()), parseLong(description.getConceptId()));
                });
            }
        }
    }
    if (!relationshipIdsToLookup.isEmpty()) {
        if (branchCriteria == null) {
            branchCriteria = versionControlHelper.getBranchCriteria(commit.getBranch());
        }
        for (List<Long> relationshipsIdsSegment : Iterables.partition(relationshipIdsToLookup, CLAUSE_LIMIT)) {
            try (final SearchHitsIterator<Relationship> stream = elasticsearchTemplate.searchForStream(new NativeSearchQueryBuilder().withQuery(branchCriteria.getEntityBranchCriteria(Relationship.class).must(termsQuery(Relationship.Fields.RELATIONSHIP_ID, relationshipsIdsSegment))).withSourceFilter(new FetchSourceFilter(new String[] { Relationship.Fields.RELATIONSHIP_ID, Relationship.Fields.SOURCE_ID }, new String[] {})).withPageable(LARGE_PAGE).build(), Relationship.class)) {
                stream.forEachRemaining(hit -> {
                    final Relationship relationship = hit.getContent();
                    componentToConceptIdMap.put(parseLong(relationship.getRelationshipId()), parseLong(relationship.getSourceId()));
                });
            }
        }
    }
    membersToLog.forEach(refsetMember -> {
        final String referencedComponentId = refsetMember.getReferencedComponentId();
        final Long conceptId = componentToConceptIdMap.get(parseLong(referencedComponentId));
        if (conceptId != null) {
            conceptToMembersMap.computeIfAbsent(conceptId, id -> new ArrayList<>()).add(refsetMember);
        } else {
            logger.error("Refset member {} with referenced component {} can not be mapped to a concept id for traceability on branch {}", refsetMember.getId(), refsetMember.getReferencedComponentId(), commit.getBranch().getPath());
        }
    });
    return conceptToMembersMap;
}
Also used : FetchSourceFilter(org.springframework.data.elasticsearch.core.query.FetchSourceFilter) Iterables(com.google.common.collect.Iterables) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) BranchMetadataHelper(org.snomed.snowstorm.core.data.services.BranchMetadataHelper) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) Long2ObjectArrayMap(it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap) IdentifierService(org.snomed.snowstorm.core.data.services.identifier.IdentifierService) Service(org.springframework.stereotype.Service) Map(java.util.Map) CREATE_CODE_SYSTEM_VERSION(org.snomed.snowstorm.core.data.services.traceability.Activity.ActivityType.CREATE_CODE_SYSTEM_VERSION) QueryBuilders.termsQuery(org.elasticsearch.index.query.QueryBuilders.termsQuery) Commit(io.kaicode.elasticvc.domain.Commit) VersionControlHelper(io.kaicode.elasticvc.api.VersionControlHelper) Logger(org.slf4j.Logger) Config(org.snomed.snowstorm.config.Config) PersistedComponents(org.snomed.snowstorm.core.data.services.pojo.PersistedComponents) Predicate(java.util.function.Predicate) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) SecurityUtil(org.ihtsdo.sso.integration.SecurityUtil) Collectors(java.util.stream.Collectors) CLAUSE_LIMIT(io.kaicode.elasticvc.api.ComponentService.CLAUSE_LIMIT) List(java.util.List) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) Jackson2ObjectMapperBuilder(org.springframework.http.converter.json.Jackson2ObjectMapperBuilder) CommitListener(io.kaicode.elasticvc.api.CommitListener) ServiceUtil(org.snomed.snowstorm.core.data.services.ServiceUtil) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) ElasticsearchOperations(org.springframework.data.elasticsearch.core.ElasticsearchOperations) SearchHitsIterator(org.springframework.data.elasticsearch.core.SearchHitsIterator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Lazy(org.springframework.context.annotation.Lazy) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) org.snomed.snowstorm.core.data.domain(org.snomed.snowstorm.core.data.domain) CONTENT(io.kaicode.elasticvc.domain.Commit.CommitType.CONTENT) Long.parseLong(java.lang.Long.parseLong) LARGE_PAGE(io.kaicode.elasticvc.api.VersionControlHelper.LARGE_PAGE) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) FetchSourceFilter(org.springframework.data.elasticsearch.core.query.FetchSourceFilter) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) ArrayList(java.util.ArrayList) Long2ObjectArrayMap(it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap) Long.parseLong(java.lang.Long.parseLong) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder)

Example 4 with SearchHitsIterator

use of org.springframework.data.elasticsearch.core.SearchHitsIterator in project snowstorm by IHTSDO.

the class ImportComponentFactoryImpl method processEntities.

/*
		- Mark as changed for version control.
		- Remove if earlier or equal effectiveTime to existing.
		- Copy release fields from existing.
	 */
private <T extends SnomedComponent> void processEntities(Collection<T> components, Integer patchReleaseVersion, ElasticsearchOperations elasticsearchTemplate, Class<T> componentClass, boolean copyReleaseFields, boolean clearEffectiveTimes) {
    Map<Integer, List<T>> effectiveDateMap = new HashMap<>();
    components.forEach(component -> {
        component.setChanged(true);
        if (clearEffectiveTimes) {
            component.setEffectiveTimeI(null);
            component.setReleased(false);
            component.setReleaseHash(null);
            component.setReleasedEffectiveTime(null);
        }
        Integer effectiveTimeI = component.getEffectiveTimeI();
        if (effectiveTimeI != null) {
            effectiveDateMap.computeIfAbsent(effectiveTimeI, i -> new ArrayList<>()).add(component);
            maxEffectiveTimeCollector.add(effectiveTimeI);
        }
    });
    // patchReleaseVersion=-1 is a special case which allows replacing any effectiveTime
    if (patchReleaseVersion == null || !patchReleaseVersion.equals(-1)) {
        for (Integer effectiveTime : new TreeSet<>(effectiveDateMap.keySet())) {
            // Find component states with an equal or greater effective time
            boolean replacementOfThisEffectiveTimeAllowed = patchReleaseVersion != null && patchReleaseVersion.equals(effectiveTime);
            List<T> componentsAtDate = effectiveDateMap.get(effectiveTime);
            String idField = componentsAtDate.get(0).getIdField();
            AtomicInteger alreadyExistingComponentCount = new AtomicInteger();
            try (SearchHitsIterator<T> componentsWithSameOrLaterEffectiveTime = elasticsearchTemplate.searchForStream(new NativeSearchQueryBuilder().withQuery(boolQuery().must(branchCriteriaBeforeOpenCommit.getEntityBranchCriteria(componentClass)).must(termsQuery(idField, componentsAtDate.stream().map(T::getId).collect(Collectors.toList()))).must(replacementOfThisEffectiveTimeAllowed ? rangeQuery(SnomedComponent.Fields.EFFECTIVE_TIME).gt(effectiveTime) : rangeQuery(SnomedComponent.Fields.EFFECTIVE_TIME).gte(effectiveTime))).withFields(// Only fetch the id
            idField).withPageable(LARGE_PAGE).build(), componentClass)) {
                componentsWithSameOrLaterEffectiveTime.forEachRemaining(hit -> {
                    // Skip component import
                    // Compared by id only
                    components.remove(hit.getContent());
                    alreadyExistingComponentCount.incrementAndGet();
                });
            }
            componentTypeSkippedMap.computeIfAbsent(componentClass.getSimpleName(), key -> new AtomicLong()).addAndGet(alreadyExistingComponentCount.get());
        }
    }
    if (copyReleaseFields) {
        Map<String, T> idToUnreleasedComponentMap = components.stream().filter(component -> component.getEffectiveTime() == null).collect(Collectors.toMap(T::getId, Function.identity()));
        if (!idToUnreleasedComponentMap.isEmpty()) {
            String idField = idToUnreleasedComponentMap.values().iterator().next().getIdField();
            try (SearchHitsIterator<T> stream = elasticsearchTemplate.searchForStream(new NativeSearchQueryBuilder().withQuery(boolQuery().must(branchCriteriaBeforeOpenCommit.getEntityBranchCriteria(componentClass)).must(termQuery(SnomedComponent.Fields.RELEASED, true)).filter(termsQuery(idField, idToUnreleasedComponentMap.keySet()))).withPageable(LARGE_PAGE).build(), componentClass)) {
                stream.forEachRemaining(hit -> {
                    T t = idToUnreleasedComponentMap.get(hit.getContent().getId());
                    // noinspection unchecked
                    t.copyReleaseDetails(hit.getContent());
                    t.updateEffectiveTime();
                });
            }
        }
    }
}
Also used : ConceptUpdateHelper(org.snomed.snowstorm.core.data.services.ConceptUpdateHelper) java.util(java.util) LoggerFactory(org.slf4j.LoggerFactory) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) Function(java.util.function.Function) BranchMetadataHelper(org.snomed.snowstorm.core.data.services.BranchMetadataHelper) Entity(io.kaicode.elasticvc.domain.Entity) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Commit(io.kaicode.elasticvc.domain.Commit) VersionControlHelper(io.kaicode.elasticvc.api.VersionControlHelper) Logger(org.slf4j.Logger) Collectors(java.util.stream.Collectors) LARGE_PAGE(io.kaicode.elasticvc.api.ComponentService.LARGE_PAGE) Sets(com.google.common.collect.Sets) ImpotentComponentFactory(org.ihtsdo.otf.snomedboot.factory.ImpotentComponentFactory) ReferenceSetMemberService(org.snomed.snowstorm.core.data.services.ReferenceSetMemberService) AtomicLong(java.util.concurrent.atomic.AtomicLong) BranchCriteria(io.kaicode.elasticvc.api.BranchCriteria) ElasticsearchOperations(org.springframework.data.elasticsearch.core.ElasticsearchOperations) SearchHitsIterator(org.springframework.data.elasticsearch.core.SearchHitsIterator) RF2Constants(org.snomed.snowstorm.core.rf2.RF2Constants) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) BranchService(io.kaicode.elasticvc.api.BranchService) org.snomed.snowstorm.core.data.domain(org.snomed.snowstorm.core.data.domain) Long.parseLong(java.lang.Long.parseLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder)

Example 5 with SearchHitsIterator

use of org.springframework.data.elasticsearch.core.SearchHitsIterator in project snowstorm by IHTSDO.

the class BranchReviewService method createConceptChangeReportOnBranchForTimeRange.

Set<Long> createConceptChangeReportOnBranchForTimeRange(String path, Date start, Date end, boolean sourceIsParent) {
    logger.info("Creating change report: branch {} time range {} ({}) to {} ({})", path, start.getTime(), start, end.getTime(), end);
    List<Branch> startTimeSlice;
    List<Branch> endTimeSlice;
    if (sourceIsParent) {
        // The source branch is the parent, so we are counting content which could be rebased down.
        // This content can come from any ancestor branch.
        startTimeSlice = versionControlHelper.getTimeSlice(path, start);
        endTimeSlice = versionControlHelper.getTimeSlice(path, end);
    } else {
        // The source branch is the child, so we are counting content which could be promoted up.
        // This content will exist on this path only.
        startTimeSlice = Lists.newArrayList(branchService.findAtTimepointOrThrow(path, start));
        endTimeSlice = Lists.newArrayList(branchService.findAtTimepointOrThrow(path, end));
    }
    if (startTimeSlice.equals(endTimeSlice)) {
        return Collections.emptySet();
    }
    // Find components of each type that are on the target branch and have been ended on the source branch
    final Set<Long> changedConcepts = new LongOpenHashSet();
    final Map<Long, Long> referenceComponentIdToConceptMap = new Long2ObjectOpenHashMap<>();
    final Set<Long> preferredDescriptionIds = new LongOpenHashSet();
    Branch branch = branchService.findBranchOrThrow(path);
    logger.debug("Collecting versions replaced for change report: branch {} time range {} to {}", path, start, end);
    Map<String, Set<String>> changedVersionsReplaced = new HashMap<>();
    // Technique: Search for replaced versions
    // Work out changes in versions replaced between time slices
    Map<String, Set<String>> startVersionsReplaced = versionControlHelper.getAllVersionsReplaced(startTimeSlice);
    Map<String, Set<String>> endVersionsReplaced = versionControlHelper.getAllVersionsReplaced(endTimeSlice);
    for (String type : Sets.union(startVersionsReplaced.keySet(), endVersionsReplaced.keySet())) {
        changedVersionsReplaced.put(type, Sets.difference(endVersionsReplaced.getOrDefault(type, Collections.emptySet()), startVersionsReplaced.getOrDefault(type, Collections.emptySet())));
    }
    if (!changedVersionsReplaced.getOrDefault(Concept.class.getSimpleName(), Collections.emptySet()).isEmpty()) {
        try (final SearchHitsIterator<Concept> stream = elasticsearchTemplate.searchForStream(componentsReplacedCriteria(changedVersionsReplaced.get(Concept.class.getSimpleName()), Concept.Fields.CONCEPT_ID).build(), Concept.class)) {
            stream.forEachRemaining(hit -> changedConcepts.add(parseLong(hit.getContent().getConceptId())));
        }
    }
    if (!changedVersionsReplaced.getOrDefault(Description.class.getSimpleName(), Collections.emptySet()).isEmpty()) {
        NativeSearchQueryBuilder fsnQuery = componentsReplacedCriteria(changedVersionsReplaced.get(Description.class.getSimpleName()), Description.Fields.CONCEPT_ID).withFilter(termQuery(Description.Fields.TYPE_ID, Concepts.FSN));
        try (final SearchHitsIterator<Description> stream = elasticsearchTemplate.searchForStream(fsnQuery.build(), Description.class)) {
            stream.forEachRemaining(hit -> changedConcepts.add(parseLong(hit.getContent().getConceptId())));
        }
    }
    if (!changedVersionsReplaced.getOrDefault(Relationship.class.getSimpleName(), Collections.emptySet()).isEmpty()) {
        try (final SearchHitsIterator<Relationship> stream = elasticsearchTemplate.searchForStream(componentsReplacedCriteria(changedVersionsReplaced.get(Relationship.class.getSimpleName()), Relationship.Fields.SOURCE_ID).build(), Relationship.class)) {
            stream.forEachRemaining(hit -> changedConcepts.add(parseLong(hit.getContent().getSourceId())));
        }
    }
    if (!changedVersionsReplaced.getOrDefault(ReferenceSetMember.class.getSimpleName(), Collections.emptySet()).isEmpty()) {
        // Refsets with the internal "conceptId" field are related to a concept in terms of authoring
        NativeSearchQueryBuilder refsetQuery = componentsReplacedCriteria(changedVersionsReplaced.get(ReferenceSetMember.class.getSimpleName()), ReferenceSetMember.Fields.REFERENCED_COMPONENT_ID, ReferenceSetMember.Fields.CONCEPT_ID).withFilter(boolQuery().must(existsQuery(ReferenceSetMember.Fields.CONCEPT_ID)));
        try (final SearchHitsIterator<ReferenceSetMember> stream = elasticsearchTemplate.searchForStream(refsetQuery.build(), ReferenceSetMember.class)) {
            stream.forEachRemaining(hit -> referenceComponentIdToConceptMap.put(parseLong(hit.getContent().getReferencedComponentId()), parseLong(hit.getContent().getConceptId())));
        }
    }
    // Technique: Search for ended versions
    BoolQueryBuilder updatesDuringRange;
    if (sourceIsParent) {
        updatesDuringRange = versionControlHelper.getUpdatesOnBranchOrAncestorsDuringRangeQuery(path, start, end);
    } else {
        updatesDuringRange = versionControlHelper.getUpdatesOnBranchDuringRangeCriteria(path, start, end);
    }
    // Find new or ended versions of each component type and collect the conceptId they relate to
    logger.debug("Collecting concept changes for change report: branch {} time range {} to {}", path, start, end);
    TimerUtil timerUtil = new TimerUtil("Collecting changes");
    NativeSearchQuery conceptsWithNewVersionsQuery = new NativeSearchQueryBuilder().withQuery(updatesDuringRange).withPageable(LARGE_PAGE).withSort(SortBuilders.fieldSort("start")).withFields(Concept.Fields.CONCEPT_ID).build();
    try (final SearchHitsIterator<Concept> stream = elasticsearchTemplate.searchForStream(conceptsWithNewVersionsQuery, Concept.class)) {
        stream.forEachRemaining(hit -> changedConcepts.add(parseLong(hit.getContent().getConceptId())));
    }
    logger.debug("Collecting description changes for change report: branch {} time range {} to {}", path, start, end);
    AtomicLong descriptions = new AtomicLong();
    NativeSearchQuery descQuery = newSearchQuery(updatesDuringRange).withFilter(termQuery(Description.Fields.TYPE_ID, Concepts.FSN)).withFields(Description.Fields.CONCEPT_ID).build();
    try (final SearchHitsIterator<Description> stream = elasticsearchTemplate.searchForStream(descQuery, Description.class)) {
        stream.forEachRemaining(hit -> {
            changedConcepts.add(parseLong(hit.getContent().getConceptId()));
            descriptions.incrementAndGet();
        });
    }
    timerUtil.checkpoint("descriptions " + descriptions.get());
    logger.debug("Collecting relationship changes for change report: branch {} time range {} to {}", path, start, end);
    AtomicLong relationships = new AtomicLong();
    NativeSearchQuery relQuery = newSearchQuery(updatesDuringRange).withFields(Relationship.Fields.SOURCE_ID).build();
    try (final SearchHitsIterator<Relationship> stream = elasticsearchTemplate.searchForStream(relQuery, Relationship.class)) {
        stream.forEachRemaining(hit -> {
            changedConcepts.add(parseLong(hit.getContent().getSourceId()));
            relationships.incrementAndGet();
        });
    }
    timerUtil.checkpoint("relationships " + relationships.get());
    logger.debug("Collecting refset member changes for change report: branch {} time range {} to {}", path, start, end);
    NativeSearchQuery memberQuery = newSearchQuery(updatesDuringRange).withFilter(boolQuery().must(existsQuery(ReferenceSetMember.Fields.CONCEPT_ID))).withFields(ReferenceSetMember.Fields.REFERENCED_COMPONENT_ID, ReferenceSetMember.Fields.CONCEPT_ID).build();
    try (final SearchHitsIterator<ReferenceSetMember> stream = elasticsearchTemplate.searchForStream(memberQuery, ReferenceSetMember.class)) {
        stream.forEachRemaining(hit -> referenceComponentIdToConceptMap.put(parseLong(hit.getContent().getReferencedComponentId()), parseLong(hit.getContent().getConceptId())));
    }
    // Filter out changes for active Synonyms
    // Inactive synonym changes should be included to avoid inactivation indicator / association clashes
    List<Long> synonymAndTextDefIds = new LongArrayList();
    NativeSearchQueryBuilder synonymQuery = new NativeSearchQueryBuilder().withQuery(versionControlHelper.getBranchCriteria(branch).getEntityBranchCriteria(Description.class)).withFilter(boolQuery().mustNot(termQuery(Description.Fields.TYPE_ID, Concepts.FSN)).must(termsQuery(Description.Fields.DESCRIPTION_ID, referenceComponentIdToConceptMap.keySet())).must(termQuery(Description.Fields.ACTIVE, true)));
    try (final SearchHitsIterator<Description> stream = elasticsearchTemplate.searchForStream(synonymQuery.build(), Description.class)) {
        stream.forEachRemaining(hit -> synonymAndTextDefIds.add(parseLong(hit.getContent().getDescriptionId())));
    }
    // Keep preferred terms if any
    NativeSearchQuery languageMemberQuery = newSearchQuery(updatesDuringRange).withFilter(boolQuery().must(existsQuery(ReferenceSetMember.Fields.CONCEPT_ID)).must(termsQuery(ReferenceSetMember.Fields.REFERENCED_COMPONENT_ID, synonymAndTextDefIds)).must(termsQuery(ReferenceSetMember.LanguageFields.ACCEPTABILITY_ID_FIELD_PATH, Concepts.PREFERRED))).withFields(ReferenceSetMember.Fields.REFERENCED_COMPONENT_ID).build();
    try (final SearchHitsIterator<ReferenceSetMember> stream = elasticsearchTemplate.searchForStream(languageMemberQuery, ReferenceSetMember.class)) {
        stream.forEachRemaining(hit -> preferredDescriptionIds.add(parseLong(hit.getContent().getReferencedComponentId())));
    }
    Set<Long> changedComponents = referenceComponentIdToConceptMap.keySet().stream().filter(r -> preferredDescriptionIds.contains(r) || !synonymAndTextDefIds.contains(r)).collect(Collectors.toSet());
    for (Long componentId : changedComponents) {
        changedConcepts.add(referenceComponentIdToConceptMap.get(componentId));
    }
    logger.info("Change report complete for branch {} time range {} to {}", path, start, end);
    return changedConcepts;
}
Also used : SortBuilders(org.elasticsearch.search.sort.SortBuilders) java.util(java.util) PreAuthorize(org.springframework.security.access.prepost.PreAuthorize) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) Lists(com.google.common.collect.Lists) org.snomed.snowstorm.core.data.domain.review(org.snomed.snowstorm.core.data.domain.review) Service(org.springframework.stereotype.Service) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) SecurityContextHolder(org.springframework.security.core.context.SecurityContextHolder) LanguageDialect(org.snomed.snowstorm.core.pojo.LanguageDialect) ExecutorService(java.util.concurrent.ExecutorService) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) VersionControlHelper(io.kaicode.elasticvc.api.VersionControlHelper) Logger(org.slf4j.Logger) MergeReviewRepository(org.snomed.snowstorm.core.data.repositories.MergeReviewRepository) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) LARGE_PAGE(io.kaicode.elasticvc.api.ComponentService.LARGE_PAGE) Sets(com.google.common.collect.Sets) BranchReviewRepository(org.snomed.snowstorm.core.data.repositories.BranchReviewRepository) AtomicLong(java.util.concurrent.atomic.AtomicLong) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) SecurityContext(org.springframework.security.core.context.SecurityContext) ManuallyMergedConceptRepository(org.snomed.snowstorm.core.data.repositories.ManuallyMergedConceptRepository) ElasticsearchOperations(org.springframework.data.elasticsearch.core.ElasticsearchOperations) SearchHitsIterator(org.springframework.data.elasticsearch.core.SearchHitsIterator) PostConstruct(javax.annotation.PostConstruct) Branch(io.kaicode.elasticvc.domain.Branch) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) BranchService(io.kaicode.elasticvc.api.BranchService) org.snomed.snowstorm.core.data.domain(org.snomed.snowstorm.core.data.domain) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) NativeSearchQuery(org.springframework.data.elasticsearch.core.query.NativeSearchQuery) Long.parseLong(java.lang.Long.parseLong) TimerUtil(org.snomed.snowstorm.core.util.TimerUtil) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Branch(io.kaicode.elasticvc.domain.Branch) TimerUtil(org.snomed.snowstorm.core.util.TimerUtil) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet) NativeSearchQueryBuilder(org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) NativeSearchQuery(org.springframework.data.elasticsearch.core.query.NativeSearchQuery) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) Long.parseLong(java.lang.Long.parseLong)

Aggregations

Logger (org.slf4j.Logger)13 LoggerFactory (org.slf4j.LoggerFactory)13 SearchHitsIterator (org.springframework.data.elasticsearch.core.SearchHitsIterator)13 NativeSearchQueryBuilder (org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder)13 Long.parseLong (java.lang.Long.parseLong)12 QueryBuilders (org.elasticsearch.index.query.QueryBuilders)12 org.snomed.snowstorm.core.data.domain (org.snomed.snowstorm.core.data.domain)12 Autowired (org.springframework.beans.factory.annotation.Autowired)12 ElasticsearchOperations (org.springframework.data.elasticsearch.core.ElasticsearchOperations)12 Service (org.springframework.stereotype.Service)12 Commit (io.kaicode.elasticvc.domain.Commit)11 java.util (java.util)11 Collectors (java.util.stream.Collectors)11 LongOpenHashSet (it.unimi.dsi.fastutil.longs.LongOpenHashSet)10 Iterables (com.google.common.collect.Iterables)9 VersionControlHelper (io.kaicode.elasticvc.api.VersionControlHelper)9 NativeSearchQuery (org.springframework.data.elasticsearch.core.query.NativeSearchQuery)9 BranchCriteria (io.kaicode.elasticvc.api.BranchCriteria)8 Branch (io.kaicode.elasticvc.domain.Branch)8 Long2ObjectOpenHashMap (it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap)8