use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.
the class AtomFieldTest method queryAndVerifyIds.
private void queryAndVerifyIds(TermQuery termQuery, String... expectedIds) {
Query query = Query.newBuilder().setTermQuery(termQuery).build();
SearchResponse response = getGrpcServer().getBlockingStub().search(SearchRequest.newBuilder().setIndexName(DEFAULT_TEST_INDEX).setStartHit(0).setTopHits(10).setQuery(query).addRetrieveFields("doc_id").build());
List<String> idList = Arrays.asList(expectedIds);
assertEquals(idList.size(), response.getHitsCount());
for (Hit hit : response.getHitsList()) {
assertTrue(idList.contains(hit.getFieldsOrThrow("doc_id").getFieldValue(0).getTextValue()));
}
}
use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.
the class BucketedTieredMergePolicyTest method verifyData.
private void verifyData(int docs) {
SearchResponse response = getGrpcServer().getBlockingStub().search(SearchRequest.newBuilder().setIndexName(DEFAULT_TEST_INDEX).setTopHits(docs + 100).setQuery(Query.newBuilder().build()).addRetrieveFields("doc_id").addRetrieveFields("int_score").addRetrieveFields("int_field").build());
assertEquals(docs, response.getHitsCount());
Set<String> seenIds = new HashSet<>();
for (Hit hit : response.getHitsList()) {
String id = hit.getFieldsOrThrow("doc_id").getFieldValue(0).getTextValue();
seenIds.add(id);
assertEquals(Integer.parseInt(id) + 1, hit.getFieldsOrThrow("int_score").getFieldValue(0).getIntValue());
assertEquals(Integer.parseInt(id) + 2, hit.getFieldsOrThrow("int_field").getFieldValue(0).getIntValue());
}
assertEquals(docs, seenIds.size());
for (int i = 0; i < docs; ++i) {
assertTrue(seenIds.contains(String.valueOf(i)));
}
}
use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.
the class SearchHandler method fetchFields.
/**
* Fetch/compute field values for the top hits. This operation may be done in parallel, based on
* the setting for the fetch thread pool. In addition to filling hit fields, any query {@link
* com.yelp.nrtsearch.server.luceneserver.search.FetchTasks.FetchTask}s are executed.
*
* @param searchContext search parameters
* @throws IOException on error reading index data
* @throws ExecutionException on error when performing parallel fetch
* @throws InterruptedException if parallel fetch is interrupted
*/
private void fetchFields(SearchContext searchContext) throws IOException, ExecutionException, InterruptedException {
if (searchContext.getResponseBuilder().getHitsBuilderList().isEmpty()) {
return;
}
// sort hits by lucene doc id
List<Hit.Builder> hitBuilders = new ArrayList<>(searchContext.getResponseBuilder().getHitsBuilderList());
hitBuilders.sort(Comparator.comparing(Hit.Builder::getLuceneDocId));
IndexState indexState = searchContext.getIndexState();
int fetch_thread_pool_size = indexState.getThreadPoolConfiguration().getMaxFetchThreads();
int min_parallel_fetch_num_fields = indexState.getThreadPoolConfiguration().getMinParallelFetchNumFields();
int min_parallel_fetch_num_hits = indexState.getThreadPoolConfiguration().getMinParallelFetchNumHits();
boolean parallelFetchByField = indexState.getThreadPoolConfiguration().getParallelFetchByField();
if (parallelFetchByField && fetch_thread_pool_size > 1 && searchContext.getRetrieveFields().keySet().size() > min_parallel_fetch_num_fields && hitBuilders.size() > min_parallel_fetch_num_hits) {
// Fetch fields in parallel
List<LeafReaderContext> leaves = searchContext.getSearcherAndTaxonomy().searcher.getIndexReader().leaves();
List<LeafReaderContext> hitIdToLeaves = new ArrayList<>();
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
LeafReaderContext leaf = leaves.get(ReaderUtil.subIndex(hitResponse.getLuceneDocId(), leaves));
hitIdToLeaves.add(hitIndex, leaf);
}
List<String> fields = new ArrayList<>(searchContext.getRetrieveFields().keySet());
// parallelism is min of fetchThreadPoolSize and fields.size() / MIN_PARALLEL_NUM_FIELDS
// round up
int parallelism = Math.min(fetch_thread_pool_size, (fields.size() + min_parallel_fetch_num_fields - 1) / min_parallel_fetch_num_fields);
List<List<String>> fieldsChunks = Lists.partition(fields, (fields.size() + parallelism - 1) / parallelism);
List<Future<List<Map<String, CompositeFieldValue>>>> futures = new ArrayList<>();
// Stored fields are not widely used for NRTSearch (not recommended for memory usage)
for (List<String> fieldsChunk : fieldsChunks) {
futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillFieldsTask(indexState, searchContext.getSearcherAndTaxonomy().searcher, hitIdToLeaves, hitBuilders, fieldsChunk, searchContext)));
}
for (Future<List<Map<String, CompositeFieldValue>>> future : futures) {
List<Map<String, CompositeFieldValue>> values = future.get();
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
hitResponse.putAllFields(values.get(hitIndex));
}
}
// execute per hit fetch tasks
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
LeafReaderContext leaf = hitIdToLeaves.get(hitIndex);
searchContext.getFetchTasks().processHit(searchContext, leaf, hitResponse);
}
} else if (!parallelFetchByField && fetch_thread_pool_size > 1 && hitBuilders.size() > min_parallel_fetch_num_hits) {
// Fetch docs in parallel
// parallelism is min of fetchThreadPoolSize and hitsBuilder.size() / MIN_PARALLEL_NUM_HITS
// round up
int parallelism = Math.min(fetch_thread_pool_size, (hitBuilders.size() + min_parallel_fetch_num_hits - 1) / min_parallel_fetch_num_hits);
List<List<Hit.Builder>> docChunks = Lists.partition(hitBuilders, (hitBuilders.size() + parallelism - 1) / parallelism);
// process each document chunk in parallel
List<Future<?>> futures = new ArrayList<>();
for (List<Hit.Builder> docChunk : docChunks) {
futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillDocsTask(searchContext, docChunk)));
}
for (Future<?> future : futures) {
future.get();
}
// no need to run the per hit fetch tasks here, since they were done in the FillDocsTask
} else {
// single threaded fetch
FillDocsTask fillDocsTask = new FillDocsTask(searchContext, hitBuilders);
fillDocsTask.run();
}
// execute all hits fetch tasks
searchContext.getFetchTasks().processAllHits(searchContext, searchContext.getResponseBuilder().getHitsBuilderList());
}
Aggregations