use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue in project nrtsearch by Yelp.
the class SearchHandler method fetchFields.
/**
* Fetch/compute field values for the top hits. This operation may be done in parallel, based on
* the setting for the fetch thread pool. In addition to filling hit fields, any query {@link
* com.yelp.nrtsearch.server.luceneserver.search.FetchTasks.FetchTask}s are executed.
*
* @param searchContext search parameters
* @throws IOException on error reading index data
* @throws ExecutionException on error when performing parallel fetch
* @throws InterruptedException if parallel fetch is interrupted
*/
private void fetchFields(SearchContext searchContext) throws IOException, ExecutionException, InterruptedException {
if (searchContext.getResponseBuilder().getHitsBuilderList().isEmpty()) {
return;
}
// sort hits by lucene doc id
List<Hit.Builder> hitBuilders = new ArrayList<>(searchContext.getResponseBuilder().getHitsBuilderList());
hitBuilders.sort(Comparator.comparing(Hit.Builder::getLuceneDocId));
IndexState indexState = searchContext.getIndexState();
int fetch_thread_pool_size = indexState.getThreadPoolConfiguration().getMaxFetchThreads();
int min_parallel_fetch_num_fields = indexState.getThreadPoolConfiguration().getMinParallelFetchNumFields();
int min_parallel_fetch_num_hits = indexState.getThreadPoolConfiguration().getMinParallelFetchNumHits();
boolean parallelFetchByField = indexState.getThreadPoolConfiguration().getParallelFetchByField();
if (parallelFetchByField && fetch_thread_pool_size > 1 && searchContext.getRetrieveFields().keySet().size() > min_parallel_fetch_num_fields && hitBuilders.size() > min_parallel_fetch_num_hits) {
// Fetch fields in parallel
List<LeafReaderContext> leaves = searchContext.getSearcherAndTaxonomy().searcher.getIndexReader().leaves();
List<LeafReaderContext> hitIdToLeaves = new ArrayList<>();
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
LeafReaderContext leaf = leaves.get(ReaderUtil.subIndex(hitResponse.getLuceneDocId(), leaves));
hitIdToLeaves.add(hitIndex, leaf);
}
List<String> fields = new ArrayList<>(searchContext.getRetrieveFields().keySet());
// parallelism is min of fetchThreadPoolSize and fields.size() / MIN_PARALLEL_NUM_FIELDS
// round up
int parallelism = Math.min(fetch_thread_pool_size, (fields.size() + min_parallel_fetch_num_fields - 1) / min_parallel_fetch_num_fields);
List<List<String>> fieldsChunks = Lists.partition(fields, (fields.size() + parallelism - 1) / parallelism);
List<Future<List<Map<String, CompositeFieldValue>>>> futures = new ArrayList<>();
// Stored fields are not widely used for NRTSearch (not recommended for memory usage)
for (List<String> fieldsChunk : fieldsChunks) {
futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillFieldsTask(indexState, searchContext.getSearcherAndTaxonomy().searcher, hitIdToLeaves, hitBuilders, fieldsChunk, searchContext)));
}
for (Future<List<Map<String, CompositeFieldValue>>> future : futures) {
List<Map<String, CompositeFieldValue>> values = future.get();
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
hitResponse.putAllFields(values.get(hitIndex));
}
}
// execute per hit fetch tasks
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
LeafReaderContext leaf = hitIdToLeaves.get(hitIndex);
searchContext.getFetchTasks().processHit(searchContext, leaf, hitResponse);
}
} else if (!parallelFetchByField && fetch_thread_pool_size > 1 && hitBuilders.size() > min_parallel_fetch_num_hits) {
// Fetch docs in parallel
// parallelism is min of fetchThreadPoolSize and hitsBuilder.size() / MIN_PARALLEL_NUM_HITS
// round up
int parallelism = Math.min(fetch_thread_pool_size, (hitBuilders.size() + min_parallel_fetch_num_hits - 1) / min_parallel_fetch_num_hits);
List<List<Hit.Builder>> docChunks = Lists.partition(hitBuilders, (hitBuilders.size() + parallelism - 1) / parallelism);
// process each document chunk in parallel
List<Future<?>> futures = new ArrayList<>();
for (List<Hit.Builder> docChunk : docChunks) {
futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillDocsTask(searchContext, docChunk)));
}
for (Future<?> future : futures) {
future.get();
}
// no need to run the per hit fetch tasks here, since they were done in the FillDocsTask
} else {
// single threaded fetch
FillDocsTask fillDocsTask = new FillDocsTask(searchContext, hitBuilders);
fillDocsTask.run();
}
// execute all hits fetch tasks
searchContext.getFetchTasks().processAllHits(searchContext, searchContext.getResponseBuilder().getHitsBuilderList());
}
Aggregations