Search in sources :

Example 1 with EsQueryBuilder

use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.

the class EsDocumentSearcher method aggregate.

@Override
public <T> Aggregation<T> aggregate(AggregationBuilder<T> aggregation) throws IOException {
    final String aggregationName = aggregation.getName();
    final EsClient client = admin.client();
    final DocumentMapping mapping = admin.mappings().getMapping(aggregation.getFrom());
    final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(mapping, admin.settings(), admin.log());
    final QueryBuilder esQuery = esQueryBuilder.build(aggregation.getQuery());
    final SearchRequest req = new SearchRequest(admin.getTypeIndex(mapping));
    final SearchSourceBuilder reqSource = req.source().query(esQuery).size(0).trackScores(false).trackTotalHitsUpTo(Integer.MAX_VALUE);
    // field selection
    final boolean fetchSource = applySourceFiltering(aggregation.getFields(), mapping, reqSource);
    reqSource.aggregation(toEsAggregation(mapping, aggregation, fetchSource));
    SearchResponse response = null;
    try {
        response = client.search(req);
    } catch (Exception e) {
        admin.log().error("Couldn't execute aggregation", e);
        throw new IndexException("Couldn't execute aggregation: " + e.getMessage(), null);
    }
    ImmutableMap.Builder<Object, Bucket<T>> buckets = ImmutableMap.builder();
    Aggregations topLevelAggregations = response.getAggregations();
    Nested nested = topLevelAggregations.get(nestedAggName(aggregation));
    Terms aggregationResult;
    if (nested != null) {
        aggregationResult = nested.getAggregations().get(aggregationName);
    } else {
        aggregationResult = topLevelAggregations.get(aggregationName);
    }
    for (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket bucket : aggregationResult.getBuckets()) {
        final TopHits topHits;
        if (nested != null) {
            final ReverseNested reverseNested = bucket.getAggregations().get(reverseNestedAggName(aggregation));
            topHits = reverseNested.getAggregations().get(topHitsAggName(aggregation));
        } else {
            topHits = bucket.getAggregations().get(topHitsAggName(aggregation));
        }
        Hits<T> hits;
        if (topHits != null) {
            hits = toHits(aggregation.getSelect(), List.of(aggregation.getFrom()), aggregation.getFields(), fetchSource, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount(), null, topHits.getHits());
        } else {
            hits = new Hits<>(Collections.emptyList(), null, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount());
        }
        buckets.put(bucket.getKey(), new Bucket<>(bucket.getKey(), hits));
    }
    return new Aggregation<>(aggregationName, buckets.build());
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) Aggregations(org.elasticsearch.search.aggregations.Aggregations) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) Aggregation(com.b2international.index.aggregations.Aggregation) TopHits(org.elasticsearch.search.aggregations.metrics.TopHits) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) BadRequestException(com.b2international.commons.exceptions.BadRequestException) IOException(java.io.IOException) SearchResponse(org.elasticsearch.action.search.SearchResponse) Bucket(com.b2international.index.aggregations.Bucket)

Example 2 with EsQueryBuilder

use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.

the class EsDocumentSearcher method search.

@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
    Stopwatch w = Stopwatch.createStarted();
    admin.log().trace("Executing query '{}'", query);
    final EsClient client = admin.client();
    final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
    final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
    // Restrict variables to the theoretical maximum
    final int limit = query.getLimit();
    final int toRead = Ints.min(limit, resultWindow);
    // TODO support multiple document mappings during query building
    final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
    final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
    final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
    // configure caching
    req.requestCache(query.isCached());
    final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
    // field selection
    final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
    // ES internals require loading the _id field when we require the _source
    if (fetchSource) {
        reqSource.storedFields(STORED_FIELDS_ID_ONLY);
    } else {
        reqSource.storedFields(STORED_FIELDS_NONE);
    }
    // paging config
    final boolean isLocalStreaming = limit > resultWindow;
    final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
    if (isLocalStreaming) {
        checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
    } else if (isLiveStreaming) {
        reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
    }
    // sorting config with a default sort field based on scroll config
    addSort(primaryMapping, reqSource, query.getSortBy());
    // disable explain explicitly, just in case
    reqSource.explain(false);
    // disable version field explicitly, just in case
    reqSource.version(false);
    // perform search
    SearchResponse response = null;
    try {
        response = client.search(req);
    } catch (Exception e) {
        if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
            throw new IllegalArgumentException(e.getMessage(), e);
        }
        admin.log().error("Couldn't execute query", e);
        throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
    }
    SearchHits responseHits = response.getHits();
    final TotalHits total = responseHits.getTotalHits();
    checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
    final int totalHitCount = (int) total.value;
    final SearchHit[] firstHits = responseHits.getHits();
    final int firstCount = firstHits.length;
    final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
    // Add the first set of results
    final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
    allHits.addAll(responseHits);
    // If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
    if (isLocalStreaming && remainingCount > 0) {
        admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
        while (true) {
            // Extract searchAfter values for the next set of results
            final SearchHit lastHit = Iterables.getLast(responseHits, null);
            if (lastHit == null) {
                break;
            }
            reqSource.searchAfter(lastHit.getSortValues());
            // Request more search results, adding them to the list builder
            response = client.search(req);
            responseHits = response.getHits();
            allHits.addAll(responseHits);
        }
    }
    final Class<T> select = query.getSelection().getSelect();
    final List<Class<?>> from = query.getSelection().getFrom();
    final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
    admin.log().trace("Executed query '{}' in '{}'", query, w);
    return hits;
}
Also used : GetResponse(org.elasticsearch.action.get.GetResponse) SortBuilders(org.elasticsearch.search.sort.SortBuilders) Query(com.b2international.index.query.Query) SearchHits(org.elasticsearch.search.SearchHits) TopHitsAggregationBuilder(org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ByteArrayInputStream(java.io.ByteArrayInputStream) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) CompareUtils(com.b2international.commons.CompareUtils) com.google.common.collect(com.google.common.collect) SearchHit(org.elasticsearch.search.SearchHit) GetRequest(org.elasticsearch.action.get.GetRequest) Aggregations(org.elasticsearch.search.aggregations.Aggregations) SortBy(com.b2international.index.query.SortBy) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) EsClient(com.b2international.index.es.client.EsClient) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Expressions(com.b2international.index.query.Expressions) RestStatus(org.elasticsearch.rest.RestStatus) AggregationBuilder(com.b2international.index.aggregations.AggregationBuilder) SortOrder(org.elasticsearch.search.sort.SortOrder) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) JavaBinCodec(org.apache.solr.common.util.JavaBinCodec) TopHits(org.elasticsearch.search.aggregations.metrics.TopHits) FetchSourceContext(org.elasticsearch.search.fetch.subphase.FetchSourceContext) SortByScript(com.b2international.index.query.SortBy.SortByScript) DataInputStream(java.io.DataInputStream) SortByField(com.b2international.index.query.SortBy.SortByField) java.util(java.util) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Stopwatch(com.google.common.base.Stopwatch) SearchRequest(org.elasticsearch.action.search.SearchRequest) Aggregation(com.b2international.index.aggregations.Aggregation) EsIndexAdmin(com.b2international.index.es.admin.EsIndexAdmin) Strings(com.google.common.base.Strings) com.b2international.index(com.b2international.index) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) MultiSortBy(com.b2international.index.query.SortBy.MultiSortBy) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) BadRequestException(com.b2international.commons.exceptions.BadRequestException) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) AggregationBuilders(org.elasticsearch.search.aggregations.AggregationBuilders) Relation(org.apache.lucene.search.TotalHits.Relation) Bucket(com.b2international.index.aggregations.Bucket) Ints(com.google.common.primitives.Ints) DocumentMapping(com.b2international.index.mapping.DocumentMapping) TotalHits(org.apache.lucene.search.TotalHits) TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) Stopwatch(com.google.common.base.Stopwatch) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchHits(org.elasticsearch.search.SearchHits) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) BadRequestException(com.b2international.commons.exceptions.BadRequestException) IOException(java.io.IOException) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 3 with EsQueryBuilder

use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.

the class EsIndexAdmin method bulkIndexByScroll.

private boolean bulkIndexByScroll(final EsClient client, final DocumentMapping mapping, final Expression filter, final String command, final org.elasticsearch.script.Script script, final String operationDescription) {
    final QueryBuilder query = new EsQueryBuilder(mapping, settings, log).build(filter);
    boolean needsRefresh = false;
    long versionConflicts = 0;
    int attempts = DEFAULT_MAX_NUMBER_OF_VERSION_CONFLICT_RETRIES;
    do {
        try {
            final BulkByScrollResponse response;
            final int batchSize = Integer.parseInt((String) settings.get(IndexClientFactory.RESULT_WINDOW_KEY));
            if ("update".equals(command)) {
                response = client.updateByQuery(getTypeIndex(mapping), batchSize, script, query);
            } else if ("delete".equals(command)) {
                response = client.deleteByQuery(getTypeIndex(mapping), batchSize, query);
            } else {
                throw new UnsupportedOperationException("Not implemented command: " + command);
            }
            final long updateCount = response.getUpdated();
            final long deleteCount = response.getDeleted();
            final long noops = response.getNoops();
            final List<Failure> failures = response.getBulkFailures();
            versionConflicts = response.getVersionConflicts();
            boolean updated = updateCount > 0;
            if (updated) {
                log().info("Updated {} {} documents with bulk {}", updateCount, mapping.typeAsString(), operationDescription);
                needsRefresh = true;
            }
            boolean deleted = deleteCount > 0;
            if (deleted) {
                log().info("Deleted {} {} documents with bulk {}", deleteCount, mapping.typeAsString(), operationDescription);
                needsRefresh = true;
            }
            if (!updated && !deleted) {
                log().warn("Bulk {} could not be applied to {} documents, no-ops ({}), conflicts ({})", operationDescription, mapping.typeAsString(), noops, versionConflicts);
            }
            if (failures.size() > 0) {
                boolean versionConflictsOnly = true;
                for (Failure failure : failures) {
                    final String failureMessage = failure.getCause().getMessage();
                    final int failureStatus = failure.getStatus().getStatus();
                    if (failureStatus != RestStatus.CONFLICT.getStatus()) {
                        versionConflictsOnly = false;
                        log().error("Index failure during bulk update: {}", failureMessage);
                    } else {
                        log().warn("Version conflict reason: {}", failureMessage);
                    }
                }
                if (!versionConflictsOnly) {
                    throw new IllegalStateException("There were indexing failures during bulk updates. See logs for all failures.");
                }
            }
            if (attempts <= 0) {
                throw new IndexException("There were indexing failures during bulk updates. See logs for all failures.", null);
            }
            if (versionConflicts > 0) {
                --attempts;
                try {
                    Thread.sleep(100 + random.nextInt(900));
                    refresh(Collections.singleton(mapping));
                } catch (InterruptedException e) {
                    throw new IndexException("Interrupted", e);
                }
            }
        } catch (IOException e) {
            throw new IndexException("Could not execute bulk update.", e);
        }
    } while (versionConflicts > 0);
    return needsRefresh;
}
Also used : EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) IOException(java.io.IOException) BulkByScrollResponse(org.elasticsearch.index.reindex.BulkByScrollResponse) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) Failure(org.elasticsearch.action.bulk.BulkItemResponse.Failure)

Aggregations

EsQueryBuilder (com.b2international.index.es.query.EsQueryBuilder)3 IOException (java.io.IOException)3 QueryBuilder (org.elasticsearch.index.query.QueryBuilder)3 BadRequestException (com.b2international.commons.exceptions.BadRequestException)2 FormattedRuntimeException (com.b2international.commons.exceptions.FormattedRuntimeException)2 Aggregation (com.b2international.index.aggregations.Aggregation)2 Bucket (com.b2international.index.aggregations.Bucket)2 EsClient (com.b2international.index.es.client.EsClient)2 DocumentMapping (com.b2international.index.mapping.DocumentMapping)2 ElasticsearchStatusException (org.elasticsearch.ElasticsearchStatusException)2 SearchRequest (org.elasticsearch.action.search.SearchRequest)2 SearchResponse (org.elasticsearch.action.search.SearchResponse)2 Aggregations (org.elasticsearch.search.aggregations.Aggregations)2 Nested (org.elasticsearch.search.aggregations.bucket.nested.Nested)2 ReverseNested (org.elasticsearch.search.aggregations.bucket.nested.ReverseNested)2 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)2 TopHits (org.elasticsearch.search.aggregations.metrics.TopHits)2 SearchSourceBuilder (org.elasticsearch.search.builder.SearchSourceBuilder)2 CompareUtils (com.b2international.commons.CompareUtils)1 com.b2international.index (com.b2international.index)1