use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.
the class EsDocumentSearcher method aggregate.
@Override
public <T> Aggregation<T> aggregate(AggregationBuilder<T> aggregation) throws IOException {
final String aggregationName = aggregation.getName();
final EsClient client = admin.client();
final DocumentMapping mapping = admin.mappings().getMapping(aggregation.getFrom());
final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(mapping, admin.settings(), admin.log());
final QueryBuilder esQuery = esQueryBuilder.build(aggregation.getQuery());
final SearchRequest req = new SearchRequest(admin.getTypeIndex(mapping));
final SearchSourceBuilder reqSource = req.source().query(esQuery).size(0).trackScores(false).trackTotalHitsUpTo(Integer.MAX_VALUE);
// field selection
final boolean fetchSource = applySourceFiltering(aggregation.getFields(), mapping, reqSource);
reqSource.aggregation(toEsAggregation(mapping, aggregation, fetchSource));
SearchResponse response = null;
try {
response = client.search(req);
} catch (Exception e) {
admin.log().error("Couldn't execute aggregation", e);
throw new IndexException("Couldn't execute aggregation: " + e.getMessage(), null);
}
ImmutableMap.Builder<Object, Bucket<T>> buckets = ImmutableMap.builder();
Aggregations topLevelAggregations = response.getAggregations();
Nested nested = topLevelAggregations.get(nestedAggName(aggregation));
Terms aggregationResult;
if (nested != null) {
aggregationResult = nested.getAggregations().get(aggregationName);
} else {
aggregationResult = topLevelAggregations.get(aggregationName);
}
for (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket bucket : aggregationResult.getBuckets()) {
final TopHits topHits;
if (nested != null) {
final ReverseNested reverseNested = bucket.getAggregations().get(reverseNestedAggName(aggregation));
topHits = reverseNested.getAggregations().get(topHitsAggName(aggregation));
} else {
topHits = bucket.getAggregations().get(topHitsAggName(aggregation));
}
Hits<T> hits;
if (topHits != null) {
hits = toHits(aggregation.getSelect(), List.of(aggregation.getFrom()), aggregation.getFields(), fetchSource, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount(), null, topHits.getHits());
} else {
hits = new Hits<>(Collections.emptyList(), null, aggregation.getBucketHitsLimit(), (int) bucket.getDocCount());
}
buckets.put(bucket.getKey(), new Bucket<>(bucket.getKey(), hits));
}
return new Aggregation<>(aggregationName, buckets.build());
}
use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.
the class EsDocumentSearcher method search.
@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
Stopwatch w = Stopwatch.createStarted();
admin.log().trace("Executing query '{}'", query);
final EsClient client = admin.client();
final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
// Restrict variables to the theoretical maximum
final int limit = query.getLimit();
final int toRead = Ints.min(limit, resultWindow);
// TODO support multiple document mappings during query building
final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
// configure caching
req.requestCache(query.isCached());
final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
// field selection
final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
// ES internals require loading the _id field when we require the _source
if (fetchSource) {
reqSource.storedFields(STORED_FIELDS_ID_ONLY);
} else {
reqSource.storedFields(STORED_FIELDS_NONE);
}
// paging config
final boolean isLocalStreaming = limit > resultWindow;
final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
if (isLocalStreaming) {
checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
} else if (isLiveStreaming) {
reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
}
// sorting config with a default sort field based on scroll config
addSort(primaryMapping, reqSource, query.getSortBy());
// disable explain explicitly, just in case
reqSource.explain(false);
// disable version field explicitly, just in case
reqSource.version(false);
// perform search
SearchResponse response = null;
try {
response = client.search(req);
} catch (Exception e) {
if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
throw new IllegalArgumentException(e.getMessage(), e);
}
admin.log().error("Couldn't execute query", e);
throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
}
SearchHits responseHits = response.getHits();
final TotalHits total = responseHits.getTotalHits();
checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
final int totalHitCount = (int) total.value;
final SearchHit[] firstHits = responseHits.getHits();
final int firstCount = firstHits.length;
final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
// Add the first set of results
final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
allHits.addAll(responseHits);
// If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
if (isLocalStreaming && remainingCount > 0) {
admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
while (true) {
// Extract searchAfter values for the next set of results
final SearchHit lastHit = Iterables.getLast(responseHits, null);
if (lastHit == null) {
break;
}
reqSource.searchAfter(lastHit.getSortValues());
// Request more search results, adding them to the list builder
response = client.search(req);
responseHits = response.getHits();
allHits.addAll(responseHits);
}
}
final Class<T> select = query.getSelection().getSelect();
final List<Class<?>> from = query.getSelection().getFrom();
final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
admin.log().trace("Executed query '{}' in '{}'", query, w);
return hits;
}
use of com.b2international.index.es.query.EsQueryBuilder in project snow-owl by b2ihealthcare.
the class EsIndexAdmin method bulkIndexByScroll.
private boolean bulkIndexByScroll(final EsClient client, final DocumentMapping mapping, final Expression filter, final String command, final org.elasticsearch.script.Script script, final String operationDescription) {
final QueryBuilder query = new EsQueryBuilder(mapping, settings, log).build(filter);
boolean needsRefresh = false;
long versionConflicts = 0;
int attempts = DEFAULT_MAX_NUMBER_OF_VERSION_CONFLICT_RETRIES;
do {
try {
final BulkByScrollResponse response;
final int batchSize = Integer.parseInt((String) settings.get(IndexClientFactory.RESULT_WINDOW_KEY));
if ("update".equals(command)) {
response = client.updateByQuery(getTypeIndex(mapping), batchSize, script, query);
} else if ("delete".equals(command)) {
response = client.deleteByQuery(getTypeIndex(mapping), batchSize, query);
} else {
throw new UnsupportedOperationException("Not implemented command: " + command);
}
final long updateCount = response.getUpdated();
final long deleteCount = response.getDeleted();
final long noops = response.getNoops();
final List<Failure> failures = response.getBulkFailures();
versionConflicts = response.getVersionConflicts();
boolean updated = updateCount > 0;
if (updated) {
log().info("Updated {} {} documents with bulk {}", updateCount, mapping.typeAsString(), operationDescription);
needsRefresh = true;
}
boolean deleted = deleteCount > 0;
if (deleted) {
log().info("Deleted {} {} documents with bulk {}", deleteCount, mapping.typeAsString(), operationDescription);
needsRefresh = true;
}
if (!updated && !deleted) {
log().warn("Bulk {} could not be applied to {} documents, no-ops ({}), conflicts ({})", operationDescription, mapping.typeAsString(), noops, versionConflicts);
}
if (failures.size() > 0) {
boolean versionConflictsOnly = true;
for (Failure failure : failures) {
final String failureMessage = failure.getCause().getMessage();
final int failureStatus = failure.getStatus().getStatus();
if (failureStatus != RestStatus.CONFLICT.getStatus()) {
versionConflictsOnly = false;
log().error("Index failure during bulk update: {}", failureMessage);
} else {
log().warn("Version conflict reason: {}", failureMessage);
}
}
if (!versionConflictsOnly) {
throw new IllegalStateException("There were indexing failures during bulk updates. See logs for all failures.");
}
}
if (attempts <= 0) {
throw new IndexException("There were indexing failures during bulk updates. See logs for all failures.", null);
}
if (versionConflicts > 0) {
--attempts;
try {
Thread.sleep(100 + random.nextInt(900));
refresh(Collections.singleton(mapping));
} catch (InterruptedException e) {
throw new IndexException("Interrupted", e);
}
}
} catch (IOException e) {
throw new IndexException("Could not execute bulk update.", e);
}
} while (versionConflicts > 0);
return needsRefresh;
}
Aggregations