use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.
the class EsDocumentSearcher method aggregate.
@Override
public <T> Aggregation<T> aggregate(AggregationBuilder<T> aggregation) throws IOException {
final String aggregationName = aggregation.getName();
final EsClient client = admin.client();
final DocumentMapping mapping = admin.mappings().getMapping(aggregation.getFrom());
final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(mapping, admin.settings(), admin.log());
final QueryBuilder esQuery = esQueryBuilder.build(aggregation.getQuery());
final SearchRequest req = new SearchRequest(admin.getTypeIndex(mapping));
final SearchSourceBuilder reqSource = req.source().query(esQuery).size(0).trackScores(false).trackTotalHitsUpTo(Integer.MAX_VALUE);
// field selection
final boolean fetchSource = applySourceFiltering(aggregation.getFields(), mapping, reqSource);
reqSource.aggregation(toEsAggregation(mapping, aggregation, fetchSource));
SearchResponse response = null;
try {
response = client.search(req);
} catch (Exception e) {
admin.log().error("Couldn't execute aggregation", e);
throw new IndexException("Couldn't execute aggregation: " + e.getMessage(), null);
}
ImmutableMap.Builder<Object, Bucket<T>> buckets = ImmutableMap.builder();
Aggregations topLevelAggregations = response.getAggregations();
Nested nested = topLevelAggregations.get(nestedAggName(aggregation));
Terms aggregationResult;
if (nested != null) {
aggregationResult = nested.getAggregations().get(aggregationName);
} else {
aggregationResult = topLevelAggregations.get(aggregationName);
}
for (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket bucket : aggregationResult.getBuckets()) {
final TopHits topHits;
if (nested != null) {
final ReverseNested reverseNested = bucket.getAggregations().get(reverseNestedAggName(aggregation));
topHits = reverseNested.getAggregations().get(topHitsAggName(aggregation));
} else {
topHits = bucket.getAggregations().get(topHitsAggName(aggregation));
}
Hits<T> hits;
if (topHits != null) {
hits = toHits(aggregation.getSelect(), List.of(aggregation.getFrom()), aggregation.getFields(), fetchSource, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount(), null, topHits.getHits());
} else {
hits = new Hits<>(Collections.emptyList(), null, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount());
}
buckets.put(bucket.getKey(), new Bucket<>(bucket.getKey(), hits));
}
return new Aggregation<>(aggregationName, buckets.build());
}
use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.
the class EsDocumentSearcher method search.
@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
Stopwatch w = Stopwatch.createStarted();
admin.log().trace("Executing query '{}'", query);
final EsClient client = admin.client();
final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
// Restrict variables to the theoretical maximum
final int limit = query.getLimit();
final int toRead = Ints.min(limit, resultWindow);
// TODO support multiple document mappings during query building
final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
// configure caching
req.requestCache(query.isCached());
final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
// field selection
final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
// ES internals require loading the _id field when we require the _source
if (fetchSource) {
reqSource.storedFields(STORED_FIELDS_ID_ONLY);
} else {
reqSource.storedFields(STORED_FIELDS_NONE);
}
// paging config
final boolean isLocalStreaming = limit > resultWindow;
final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
if (isLocalStreaming) {
checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
} else if (isLiveStreaming) {
reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
}
// sorting config with a default sort field based on scroll config
addSort(primaryMapping, reqSource, query.getSortBy());
// disable explain explicitly, just in case
reqSource.explain(false);
// disable version field explicitly, just in case
reqSource.version(false);
// perform search
SearchResponse response = null;
try {
response = client.search(req);
} catch (Exception e) {
if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
throw new IllegalArgumentException(e.getMessage(), e);
}
admin.log().error("Couldn't execute query", e);
throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
}
SearchHits responseHits = response.getHits();
final TotalHits total = responseHits.getTotalHits();
checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
final int totalHitCount = (int) total.value;
final SearchHit[] firstHits = responseHits.getHits();
final int firstCount = firstHits.length;
final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
// Add the first set of results
final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
allHits.addAll(responseHits);
// If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
if (isLocalStreaming && remainingCount > 0) {
admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
while (true) {
// Extract searchAfter values for the next set of results
final SearchHit lastHit = Iterables.getLast(responseHits, null);
if (lastHit == null) {
break;
}
reqSource.searchAfter(lastHit.getSortValues());
// Request more search results, adding them to the list builder
response = client.search(req);
responseHits = response.getHits();
allHits.addAll(responseHits);
}
}
final Class<T> select = query.getSelection().getSelect();
final List<Class<?>> from = query.getSelection().getFrom();
final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
admin.log().trace("Executed query '{}' in '{}'", query, w);
return hits;
}
use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.
the class EsDocumentWriter method commit.
@Override
public void commit() throws IOException {
if (isEmpty()) {
return;
}
final Set<DocumentMapping> mappingsToRefresh = Collections.synchronizedSet(newHashSet());
final EsClient client = admin.client();
// apply bulk updates first
final ListeningExecutorService executor;
if (bulkUpdateOperations.size() > 1 || bulkDeleteOperations.size() > 1) {
final int threads = Math.min(4, Math.max(bulkUpdateOperations.size(), bulkDeleteOperations.size()));
executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(threads));
} else {
executor = MoreExecutors.newDirectExecutorService();
}
final List<ListenableFuture<?>> updateFutures = newArrayList();
for (BulkUpdate<?> update : bulkUpdateOperations) {
updateFutures.add(executor.submit(() -> {
if (admin.bulkUpdate(update)) {
mappingsToRefresh.add(admin.mappings().getMapping(update.getType()));
}
}));
}
for (BulkDelete<?> delete : bulkDeleteOperations) {
updateFutures.add(executor.submit(() -> {
if (admin.bulkDelete(delete)) {
mappingsToRefresh.add(admin.mappings().getMapping(delete.getType()));
}
}));
}
try {
executor.shutdown();
Futures.allAsList(updateFutures).get();
executor.awaitTermination(10, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException e) {
admin.log().error("Couldn't execute bulk updates", e);
throw new IndexException("Couldn't execute bulk updates", e);
}
// then bulk indexes/deletes
if (!indexOperations.isEmpty() || !deleteOperations.isEmpty()) {
final BulkProcessor processor = client.bulk(new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
admin.log().debug("Sending bulk request {}", request.numberOfActions());
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
admin.log().error("Failed bulk request", failure);
}
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
admin.log().debug("Successfully processed bulk request ({}) in {}.", request.numberOfActions(), response.getTook());
if (response.hasFailures()) {
for (BulkItemResponse itemResponse : response.getItems()) {
checkState(!itemResponse.isFailed(), "Failed to commit bulk request in index '%s', %s", admin.name(), itemResponse.getFailureMessage());
}
}
}
}).setConcurrentRequests(getConcurrencyLevel()).setBulkActions((int) admin.settings().get(IndexClientFactory.BULK_ACTIONS_SIZE)).setBulkSize(new ByteSizeValue((int) admin.settings().get(IndexClientFactory.BULK_ACTIONS_SIZE_IN_MB), ByteSizeUnit.MB)).build();
for (Class<?> type : ImmutableSet.copyOf(indexOperations.rowKeySet())) {
final Map<String, Object> indexOperationsForType = indexOperations.row(type);
final DocumentMapping mapping = admin.mappings().getMapping(type);
final String typeIndex = admin.getTypeIndex(mapping);
mappingsToRefresh.add(mapping);
for (Entry<String, Object> entry : Iterables.consumingIterable(indexOperationsForType.entrySet())) {
final String id = entry.getKey();
if (!deleteOperations.containsValue(id)) {
final Object obj = entry.getValue();
final byte[] _source = mapper.writeValueAsBytes(obj);
IndexRequest indexRequest = new IndexRequest().index(typeIndex).opType(OpType.INDEX).source(_source, XContentType.JSON);
// XXX revisions has their special local ID, but that's not needed when sending them to ES, ES will autogenerate a non-conflicting ID for them
if (!mapping.isAutoGeneratedId()) {
indexRequest.id(id);
}
processor.add(indexRequest);
}
}
for (String id : deleteOperations.removeAll(type)) {
processor.add(new DeleteRequest(typeIndex, id));
}
// Flush processor between index boundaries
processor.flush();
}
// Remaining delete operations can be executed on their own
for (Class<?> type : ImmutableSet.copyOf(deleteOperations.keySet())) {
final DocumentMapping mapping = admin.mappings().getMapping(type);
final String typeIndex = admin.getTypeIndex(mapping);
mappingsToRefresh.add(mapping);
for (String id : deleteOperations.removeAll(type)) {
processor.add(new DeleteRequest(typeIndex, id));
}
// Flush processor between index boundaries
processor.flush();
}
try {
processor.awaitClose(5, TimeUnit.MINUTES);
} catch (InterruptedException e) {
throw new IndexException("Interrupted bulk processing part of the commit", e);
}
}
// refresh the index if there were only updates
admin.refresh(mappingsToRefresh);
}
use of com.b2international.index.es.client.EsClient in project snow-owl by b2ihealthcare.
the class EsIndexClientFactory method createClient.
@Override
public IndexClient createClient(String name, ObjectMapper mapper, Mappings mappings, Map<String, Object> settings) {
final boolean persistent = settings.containsKey(DATA_DIRECTORY);
final Object dataSetting = settings.getOrDefault(DATA_DIRECTORY, DEFAULT_PATH);
final Object configSetting = settings.getOrDefault(CONFIG_DIRECTORY, DEFAULT_PATH);
final Path dataDirectory = dataSetting instanceof Path ? (Path) dataSetting : Paths.get((String) dataSetting);
final Path configDirectory = configSetting instanceof Path ? (Path) configSetting : Paths.get((String) configSetting);
// generic ES cluster settings
final String clusterName = (String) settings.getOrDefault(CLUSTER_NAME, DEFAULT_CLUSTER_NAME);
final Object connectTimeoutSetting = settings.getOrDefault(CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT);
final Object socketTimeoutSetting = settings.getOrDefault(SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT);
final int connectTimeout = connectTimeoutSetting instanceof Integer ? (int) connectTimeoutSetting : Integer.parseInt((String) connectTimeoutSetting);
final int socketTimeout = socketTimeoutSetting instanceof Integer ? (int) socketTimeoutSetting : Integer.parseInt((String) socketTimeoutSetting);
final String username = (String) settings.getOrDefault(CLUSTER_USERNAME, "");
final String password = (String) settings.getOrDefault(CLUSTER_PASSWORD, "");
final EsClient client;
if (settings.containsKey(CLUSTER_URL)) {
final String clusterUrl = (String) settings.get(CLUSTER_URL);
client = EsClient.create(new EsClientConfiguration(clusterName, clusterUrl, username, password, connectTimeout, socketTimeout));
} else {
// Start an embedded ES node only if a cluster URL is not set
Node node = EsNode.getInstance(clusterName, configDirectory, dataDirectory, persistent);
// check sysprop to force HTTP client when still using embedded mode
if (System.getProperty("so.index.es.useHttp") != null) {
client = EsClient.create(new EsClientConfiguration(clusterName, "http://127.0.0.1:9200", username, password, connectTimeout, socketTimeout));
} else {
// and use the local NodeClient to communicate via the embedded node
client = new EsTcpClient(node.client());
}
}
return new EsIndexClient(new EsIndexAdmin(client, mapper, name, mappings, settings), mapper);
}
Aggregations