Search in sources :

Example 1 with CompositeFieldValue

use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue in project nrtsearch by Yelp.

the class SearchHandler method fetchFields.

/**
 * Fetch/compute field values for the top hits. This operation may be done in parallel, based on
 * the setting for the fetch thread pool. In addition to filling hit fields, any query {@link
 * com.yelp.nrtsearch.server.luceneserver.search.FetchTasks.FetchTask}s are executed.
 *
 * @param searchContext search parameters
 * @throws IOException on error reading index data
 * @throws ExecutionException on error when performing parallel fetch
 * @throws InterruptedException if parallel fetch is interrupted
 */
private void fetchFields(SearchContext searchContext) throws IOException, ExecutionException, InterruptedException {
    if (searchContext.getResponseBuilder().getHitsBuilderList().isEmpty()) {
        return;
    }
    // sort hits by lucene doc id
    List<Hit.Builder> hitBuilders = new ArrayList<>(searchContext.getResponseBuilder().getHitsBuilderList());
    hitBuilders.sort(Comparator.comparing(Hit.Builder::getLuceneDocId));
    IndexState indexState = searchContext.getIndexState();
    int fetch_thread_pool_size = indexState.getThreadPoolConfiguration().getMaxFetchThreads();
    int min_parallel_fetch_num_fields = indexState.getThreadPoolConfiguration().getMinParallelFetchNumFields();
    int min_parallel_fetch_num_hits = indexState.getThreadPoolConfiguration().getMinParallelFetchNumHits();
    boolean parallelFetchByField = indexState.getThreadPoolConfiguration().getParallelFetchByField();
    if (parallelFetchByField && fetch_thread_pool_size > 1 && searchContext.getRetrieveFields().keySet().size() > min_parallel_fetch_num_fields && hitBuilders.size() > min_parallel_fetch_num_hits) {
        // Fetch fields in parallel
        List<LeafReaderContext> leaves = searchContext.getSearcherAndTaxonomy().searcher.getIndexReader().leaves();
        List<LeafReaderContext> hitIdToLeaves = new ArrayList<>();
        for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
            var hitResponse = hitBuilders.get(hitIndex);
            LeafReaderContext leaf = leaves.get(ReaderUtil.subIndex(hitResponse.getLuceneDocId(), leaves));
            hitIdToLeaves.add(hitIndex, leaf);
        }
        List<String> fields = new ArrayList<>(searchContext.getRetrieveFields().keySet());
        // parallelism is min of fetchThreadPoolSize and fields.size() / MIN_PARALLEL_NUM_FIELDS
        // round up
        int parallelism = Math.min(fetch_thread_pool_size, (fields.size() + min_parallel_fetch_num_fields - 1) / min_parallel_fetch_num_fields);
        List<List<String>> fieldsChunks = Lists.partition(fields, (fields.size() + parallelism - 1) / parallelism);
        List<Future<List<Map<String, CompositeFieldValue>>>> futures = new ArrayList<>();
        // Stored fields are not widely used for NRTSearch (not recommended for memory usage)
        for (List<String> fieldsChunk : fieldsChunks) {
            futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillFieldsTask(indexState, searchContext.getSearcherAndTaxonomy().searcher, hitIdToLeaves, hitBuilders, fieldsChunk, searchContext)));
        }
        for (Future<List<Map<String, CompositeFieldValue>>> future : futures) {
            List<Map<String, CompositeFieldValue>> values = future.get();
            for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
                var hitResponse = hitBuilders.get(hitIndex);
                hitResponse.putAllFields(values.get(hitIndex));
            }
        }
        // execute per hit fetch tasks
        for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
            var hitResponse = hitBuilders.get(hitIndex);
            LeafReaderContext leaf = hitIdToLeaves.get(hitIndex);
            searchContext.getFetchTasks().processHit(searchContext, leaf, hitResponse);
        }
    } else if (!parallelFetchByField && fetch_thread_pool_size > 1 && hitBuilders.size() > min_parallel_fetch_num_hits) {
        // Fetch docs in parallel
        // parallelism is min of fetchThreadPoolSize and hitsBuilder.size() / MIN_PARALLEL_NUM_HITS
        // round up
        int parallelism = Math.min(fetch_thread_pool_size, (hitBuilders.size() + min_parallel_fetch_num_hits - 1) / min_parallel_fetch_num_hits);
        List<List<Hit.Builder>> docChunks = Lists.partition(hitBuilders, (hitBuilders.size() + parallelism - 1) / parallelism);
        // process each document chunk in parallel
        List<Future<?>> futures = new ArrayList<>();
        for (List<Hit.Builder> docChunk : docChunks) {
            futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillDocsTask(searchContext, docChunk)));
        }
        for (Future<?> future : futures) {
            future.get();
        }
    // no need to run the per hit fetch tasks here, since they were done in the FillDocsTask
    } else {
        // single threaded fetch
        FillDocsTask fillDocsTask = new FillDocsTask(searchContext, hitBuilders);
        fillDocsTask.run();
    }
    // execute all hits fetch tasks
    searchContext.getFetchTasks().processAllHits(searchContext, searchContext.getResponseBuilder().getHitsBuilderList());
}
Also used : ArrayList(java.util.ArrayList) Hit(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Future(java.util.concurrent.Future) List(java.util.List) ArrayList(java.util.ArrayList) CompositeFieldValue(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

Hit (com.yelp.nrtsearch.server.grpc.SearchResponse.Hit)1 CompositeFieldValue (com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Future (java.util.concurrent.Future)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1