Search in sources :

Example 1 with EsClient

use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.

the class EsDocumentSearcher method aggregate.

@Override
public <T> Aggregation<T> aggregate(AggregationBuilder<T> aggregation) throws IOException {
    final String aggregationName = aggregation.getName();
    final EsClient client = admin.client();
    final DocumentMapping mapping = admin.mappings().getMapping(aggregation.getFrom());
    final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(mapping, admin.settings(), admin.log());
    final QueryBuilder esQuery = esQueryBuilder.build(aggregation.getQuery());
    final SearchRequest req = new SearchRequest(admin.getTypeIndex(mapping));
    final SearchSourceBuilder reqSource = req.source().query(esQuery).size(0).trackScores(false).trackTotalHitsUpTo(Integer.MAX_VALUE);
    // field selection
    final boolean fetchSource = applySourceFiltering(aggregation.getFields(), mapping, reqSource);
    reqSource.aggregation(toEsAggregation(mapping, aggregation, fetchSource));
    SearchResponse response = null;
    try {
        response = client.search(req);
    } catch (Exception e) {
        admin.log().error("Couldn't execute aggregation", e);
        throw new IndexException("Couldn't execute aggregation: " + e.getMessage(), null);
    }
    ImmutableMap.Builder<Object, Bucket<T>> buckets = ImmutableMap.builder();
    Aggregations topLevelAggregations = response.getAggregations();
    Nested nested = topLevelAggregations.get(nestedAggName(aggregation));
    Terms aggregationResult;
    if (nested != null) {
        aggregationResult = nested.getAggregations().get(aggregationName);
    } else {
        aggregationResult = topLevelAggregations.get(aggregationName);
    }
    for (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket bucket : aggregationResult.getBuckets()) {
        final TopHits topHits;
        if (nested != null) {
            final ReverseNested reverseNested = bucket.getAggregations().get(reverseNestedAggName(aggregation));
            topHits = reverseNested.getAggregations().get(topHitsAggName(aggregation));
        } else {
            topHits = bucket.getAggregations().get(topHitsAggName(aggregation));
        }
        Hits<T> hits;
        if (topHits != null) {
            hits = toHits(aggregation.getSelect(), List.of(aggregation.getFrom()), aggregation.getFields(), fetchSource, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount(), null, topHits.getHits());
        } else {
            hits = new Hits<>(Collections.emptyList(), null, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount());
        }
        buckets.put(bucket.getKey(), new Bucket<>(bucket.getKey(), hits));
    }
    return new Aggregation<>(aggregationName, buckets.build());
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) Aggregations(org.elasticsearch.search.aggregations.Aggregations) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) Aggregation(com.b2international.index.aggregations.Aggregation) TopHits(org.elasticsearch.search.aggregations.metrics.TopHits) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) BadRequestException(com.b2international.commons.exceptions.BadRequestException) IOException(java.io.IOException) SearchResponse(org.elasticsearch.action.search.SearchResponse) Bucket(com.b2international.index.aggregations.Bucket)

Example 2 with EsClient

use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.

the class EsDocumentSearcher method search.

@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
    Stopwatch w = Stopwatch.createStarted();
    admin.log().trace("Executing query '{}'", query);
    final EsClient client = admin.client();
    final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
    final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
    // Restrict variables to the theoretical maximum
    final int limit = query.getLimit();
    final int toRead = Ints.min(limit, resultWindow);
    // TODO support multiple document mappings during query building
    final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
    final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
    final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
    // configure caching
    req.requestCache(query.isCached());
    final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
    // field selection
    final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
    // ES internals require loading the _id field when we require the _source
    if (fetchSource) {
        reqSource.storedFields(STORED_FIELDS_ID_ONLY);
    } else {
        reqSource.storedFields(STORED_FIELDS_NONE);
    }
    // paging config
    final boolean isLocalStreaming = limit > resultWindow;
    final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
    if (isLocalStreaming) {
        checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
    } else if (isLiveStreaming) {
        reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
    }
    // sorting config with a default sort field based on scroll config
    addSort(primaryMapping, reqSource, query.getSortBy());
    // disable explain explicitly, just in case
    reqSource.explain(false);
    // disable version field explicitly, just in case
    reqSource.version(false);
    // perform search
    SearchResponse response = null;
    try {
        response = client.search(req);
    } catch (Exception e) {
        if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
            throw new IllegalArgumentException(e.getMessage(), e);
        }
        admin.log().error("Couldn't execute query", e);
        throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
    }
    SearchHits responseHits = response.getHits();
    final TotalHits total = responseHits.getTotalHits();
    checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
    final int totalHitCount = (int) total.value;
    final SearchHit[] firstHits = responseHits.getHits();
    final int firstCount = firstHits.length;
    final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
    // Add the first set of results
    final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
    allHits.addAll(responseHits);
    // If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
    if (isLocalStreaming && remainingCount > 0) {
        admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
        while (true) {
            // Extract searchAfter values for the next set of results
            final SearchHit lastHit = Iterables.getLast(responseHits, null);
            if (lastHit == null) {
                break;
            }
            reqSource.searchAfter(lastHit.getSortValues());
            // Request more search results, adding them to the list builder
            response = client.search(req);
            responseHits = response.getHits();
            allHits.addAll(responseHits);
        }
    }
    final Class<T> select = query.getSelection().getSelect();
    final List<Class<?>> from = query.getSelection().getFrom();
    final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
    admin.log().trace("Executed query '{}' in '{}'", query, w);
    return hits;
}
Also used : GetResponse(org.elasticsearch.action.get.GetResponse) SortBuilders(org.elasticsearch.search.sort.SortBuilders) Query(com.b2international.index.query.Query) SearchHits(org.elasticsearch.search.SearchHits) TopHitsAggregationBuilder(org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ByteArrayInputStream(java.io.ByteArrayInputStream) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) CompareUtils(com.b2international.commons.CompareUtils) com.google.common.collect(com.google.common.collect) SearchHit(org.elasticsearch.search.SearchHit) GetRequest(org.elasticsearch.action.get.GetRequest) Aggregations(org.elasticsearch.search.aggregations.Aggregations) SortBy(com.b2international.index.query.SortBy) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) EsClient(com.b2international.index.es.client.EsClient) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Expressions(com.b2international.index.query.Expressions) RestStatus(org.elasticsearch.rest.RestStatus) AggregationBuilder(com.b2international.index.aggregations.AggregationBuilder) SortOrder(org.elasticsearch.search.sort.SortOrder) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) JavaBinCodec(org.apache.solr.common.util.JavaBinCodec) TopHits(org.elasticsearch.search.aggregations.metrics.TopHits) FetchSourceContext(org.elasticsearch.search.fetch.subphase.FetchSourceContext) SortByScript(com.b2international.index.query.SortBy.SortByScript) DataInputStream(java.io.DataInputStream) SortByField(com.b2international.index.query.SortBy.SortByField) java.util(java.util) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Stopwatch(com.google.common.base.Stopwatch) SearchRequest(org.elasticsearch.action.search.SearchRequest) Aggregation(com.b2international.index.aggregations.Aggregation) EsIndexAdmin(com.b2international.index.es.admin.EsIndexAdmin) Strings(com.google.common.base.Strings) com.b2international.index(com.b2international.index) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) MultiSortBy(com.b2international.index.query.SortBy.MultiSortBy) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) BadRequestException(com.b2international.commons.exceptions.BadRequestException) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) AggregationBuilders(org.elasticsearch.search.aggregations.AggregationBuilders) Relation(org.apache.lucene.search.TotalHits.Relation) Bucket(com.b2international.index.aggregations.Bucket) Ints(com.google.common.primitives.Ints) DocumentMapping(com.b2international.index.mapping.DocumentMapping) TotalHits(org.apache.lucene.search.TotalHits) TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) Stopwatch(com.google.common.base.Stopwatch) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchHits(org.elasticsearch.search.SearchHits) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) BadRequestException(com.b2international.commons.exceptions.BadRequestException) IOException(java.io.IOException) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 3 with EsClient

use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.

the class EsDocumentWriter method commit.

@Override
public void commit() throws IOException {
    if (isEmpty()) {
        return;
    }
    final Set<DocumentMapping> mappingsToRefresh = Collections.synchronizedSet(newHashSet());
    final EsClient client = admin.client();
    // apply bulk updates first
    final ListeningExecutorService executor;
    if (bulkUpdateOperations.size() > 1 || bulkDeleteOperations.size() > 1) {
        final int threads = Math.min(4, Math.max(bulkUpdateOperations.size(), bulkDeleteOperations.size()));
        executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(threads));
    } else {
        executor = MoreExecutors.newDirectExecutorService();
    }
    final List<ListenableFuture<?>> updateFutures = newArrayList();
    for (BulkUpdate<?> update : bulkUpdateOperations) {
        updateFutures.add(executor.submit(() -> {
            if (admin.bulkUpdate(update)) {
                mappingsToRefresh.add(admin.mappings().getMapping(update.getType()));
            }
        }));
    }
    for (BulkDelete<?> delete : bulkDeleteOperations) {
        updateFutures.add(executor.submit(() -> {
            if (admin.bulkDelete(delete)) {
                mappingsToRefresh.add(admin.mappings().getMapping(delete.getType()));
            }
        }));
    }
    try {
        executor.shutdown();
        Futures.allAsList(updateFutures).get();
        executor.awaitTermination(10, TimeUnit.SECONDS);
    } catch (InterruptedException | ExecutionException e) {
        admin.log().error("Couldn't execute bulk updates", e);
        throw new IndexException("Couldn't execute bulk updates", e);
    }
    // then bulk indexes/deletes
    if (!indexOperations.isEmpty() || !deleteOperations.isEmpty()) {
        final BulkProcessor processor = client.bulk(new BulkProcessor.Listener() {

            @Override
            public void beforeBulk(long executionId, BulkRequest request) {
                admin.log().debug("Sending bulk request {}", request.numberOfActions());
            }

            @Override
            public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                admin.log().error("Failed bulk request", failure);
            }

            @Override
            public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                admin.log().debug("Successfully processed bulk request ({}) in {}.", request.numberOfActions(), response.getTook());
                if (response.hasFailures()) {
                    for (BulkItemResponse itemResponse : response.getItems()) {
                        checkState(!itemResponse.isFailed(), "Failed to commit bulk request in index '%s', %s", admin.name(), itemResponse.getFailureMessage());
                    }
                }
            }
        }).setConcurrentRequests(getConcurrencyLevel()).setBulkActions((int) admin.settings().get(IndexClientFactory.BULK_ACTIONS_SIZE)).setBulkSize(new ByteSizeValue((int) admin.settings().get(IndexClientFactory.BULK_ACTIONS_SIZE_IN_MB), ByteSizeUnit.MB)).build();
        for (Class<?> type : ImmutableSet.copyOf(indexOperations.rowKeySet())) {
            final Map<String, Object> indexOperationsForType = indexOperations.row(type);
            final DocumentMapping mapping = admin.mappings().getMapping(type);
            final String typeIndex = admin.getTypeIndex(mapping);
            mappingsToRefresh.add(mapping);
            for (Entry<String, Object> entry : Iterables.consumingIterable(indexOperationsForType.entrySet())) {
                final String id = entry.getKey();
                if (!deleteOperations.containsValue(id)) {
                    final Object obj = entry.getValue();
                    final byte[] _source = mapper.writeValueAsBytes(obj);
                    IndexRequest indexRequest = new IndexRequest().index(typeIndex).opType(OpType.INDEX).source(_source, XContentType.JSON);
                    // XXX revisions has their special local ID, but that's not needed when sending them to ES, ES will autogenerate a non-conflicting ID for them
                    if (!mapping.isAutoGeneratedId()) {
                        indexRequest.id(id);
                    }
                    processor.add(indexRequest);
                }
            }
            for (String id : deleteOperations.removeAll(type)) {
                processor.add(new DeleteRequest(typeIndex, id));
            }
            // Flush processor between index boundaries
            processor.flush();
        }
        // Remaining delete operations can be executed on their own
        for (Class<?> type : ImmutableSet.copyOf(deleteOperations.keySet())) {
            final DocumentMapping mapping = admin.mappings().getMapping(type);
            final String typeIndex = admin.getTypeIndex(mapping);
            mappingsToRefresh.add(mapping);
            for (String id : deleteOperations.removeAll(type)) {
                processor.add(new DeleteRequest(typeIndex, id));
            }
            // Flush processor between index boundaries
            processor.flush();
        }
        try {
            processor.awaitClose(5, TimeUnit.MINUTES);
        } catch (InterruptedException e) {
            throw new IndexException("Interrupted bulk processing part of the commit", e);
        }
    }
    // refresh the index if there were only updates
    admin.refresh(mappingsToRefresh);
}
Also used : ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) IndexRequest(org.elasticsearch.action.index.IndexRequest) BulkProcessor(org.elasticsearch.action.bulk.BulkProcessor) ExecutionException(java.util.concurrent.ExecutionException) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest)

Example 4 with EsClient

use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.

the class EsIndexClientFactory method createClient.

@Override
public IndexClient createClient(String name, ObjectMapper mapper, Mappings mappings, Map<String, Object> settings) {
    final boolean persistent = settings.containsKey(DATA_DIRECTORY);
    final Object dataSetting = settings.getOrDefault(DATA_DIRECTORY, DEFAULT_PATH);
    final Object configSetting = settings.getOrDefault(CONFIG_DIRECTORY, DEFAULT_PATH);
    final Path dataDirectory = dataSetting instanceof Path ? (Path) dataSetting : Paths.get((String) dataSetting);
    final Path configDirectory = configSetting instanceof Path ? (Path) configSetting : Paths.get((String) configSetting);
    // generic ES cluster settings
    final String clusterName = (String) settings.getOrDefault(CLUSTER_NAME, DEFAULT_CLUSTER_NAME);
    final Object connectTimeoutSetting = settings.getOrDefault(CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT);
    final Object socketTimeoutSetting = settings.getOrDefault(SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT);
    final int connectTimeout = connectTimeoutSetting instanceof Integer ? (int) connectTimeoutSetting : Integer.parseInt((String) connectTimeoutSetting);
    final int socketTimeout = socketTimeoutSetting instanceof Integer ? (int) socketTimeoutSetting : Integer.parseInt((String) socketTimeoutSetting);
    final String username = (String) settings.getOrDefault(CLUSTER_USERNAME, "");
    final String password = (String) settings.getOrDefault(CLUSTER_PASSWORD, "");
    final EsClient client;
    if (settings.containsKey(CLUSTER_URL)) {
        final String clusterUrl = (String) settings.get(CLUSTER_URL);
        client = EsClient.create(new EsClientConfiguration(clusterName, clusterUrl, username, password, connectTimeout, socketTimeout));
    } else {
        // Start an embedded ES node only if a cluster URL is not set
        Node node = EsNode.getInstance(clusterName, configDirectory, dataDirectory, persistent);
        // check sysprop to force HTTP client when still using embedded mode
        if (System.getProperty("so.index.es.useHttp") != null) {
            client = EsClient.create(new EsClientConfiguration(clusterName, "http://127.0.0.1:9200", username, password, connectTimeout, socketTimeout));
        } else {
            // and use the local NodeClient to communicate via the embedded node
            client = new EsTcpClient(node.client());
        }
    }
    return new EsIndexClient(new EsIndexAdmin(client, mapper, name, mappings, settings), mapper);
}
Also used : Path(java.nio.file.Path) EsIndexAdmin(com.b2international.index.es.admin.EsIndexAdmin) EsTcpClient(com.b2international.index.es.client.tcp.EsTcpClient) Node(org.elasticsearch.node.Node) EsClient(com.b2international.index.es.client.EsClient)

Aggregations

EsClient (com.b2international.index.es.client.EsClient)4 DocumentMapping (com.b2international.index.mapping.DocumentMapping)3 BadRequestException (com.b2international.commons.exceptions.BadRequestException)2 FormattedRuntimeException (com.b2international.commons.exceptions.FormattedRuntimeException)2 Aggregation (com.b2international.index.aggregations.Aggregation)2 Bucket (com.b2international.index.aggregations.Bucket)2 EsIndexAdmin (com.b2international.index.es.admin.EsIndexAdmin)2 EsQueryBuilder (com.b2international.index.es.query.EsQueryBuilder)2 IOException (java.io.IOException)2 ElasticsearchStatusException (org.elasticsearch.ElasticsearchStatusException)2 SearchRequest (org.elasticsearch.action.search.SearchRequest)2 SearchResponse (org.elasticsearch.action.search.SearchResponse)2 QueryBuilder (org.elasticsearch.index.query.QueryBuilder)2 Aggregations (org.elasticsearch.search.aggregations.Aggregations)2 Nested (org.elasticsearch.search.aggregations.bucket.nested.Nested)2 ReverseNested (org.elasticsearch.search.aggregations.bucket.nested.ReverseNested)2 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)2 TopHits (org.elasticsearch.search.aggregations.metrics.TopHits)2 SearchSourceBuilder (org.elasticsearch.search.builder.SearchSourceBuilder)2 CompareUtils (com.b2international.commons.CompareUtils)1