Search in sources :

Example 6 with Hit

use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.

the class AtomFieldTest method queryAndVerifyIds.

private void queryAndVerifyIds(TermQuery termQuery, String... expectedIds) {
    Query query = Query.newBuilder().setTermQuery(termQuery).build();
    SearchResponse response = getGrpcServer().getBlockingStub().search(SearchRequest.newBuilder().setIndexName(DEFAULT_TEST_INDEX).setStartHit(0).setTopHits(10).setQuery(query).addRetrieveFields("doc_id").build());
    List<String> idList = Arrays.asList(expectedIds);
    assertEquals(idList.size(), response.getHitsCount());
    for (Hit hit : response.getHitsList()) {
        assertTrue(idList.contains(hit.getFieldsOrThrow("doc_id").getFieldValue(0).getTextValue()));
    }
}
Also used : Hit(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit) Query(com.yelp.nrtsearch.server.grpc.Query) TermQuery(com.yelp.nrtsearch.server.grpc.TermQuery) TermInSetQuery(com.yelp.nrtsearch.server.grpc.TermInSetQuery) SearchResponse(com.yelp.nrtsearch.server.grpc.SearchResponse)

Example 7 with Hit

use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.

the class BucketedTieredMergePolicyTest method verifyData.

private void verifyData(int docs) {
    SearchResponse response = getGrpcServer().getBlockingStub().search(SearchRequest.newBuilder().setIndexName(DEFAULT_TEST_INDEX).setTopHits(docs + 100).setQuery(Query.newBuilder().build()).addRetrieveFields("doc_id").addRetrieveFields("int_score").addRetrieveFields("int_field").build());
    assertEquals(docs, response.getHitsCount());
    Set<String> seenIds = new HashSet<>();
    for (Hit hit : response.getHitsList()) {
        String id = hit.getFieldsOrThrow("doc_id").getFieldValue(0).getTextValue();
        seenIds.add(id);
        assertEquals(Integer.parseInt(id) + 1, hit.getFieldsOrThrow("int_score").getFieldValue(0).getIntValue());
        assertEquals(Integer.parseInt(id) + 2, hit.getFieldsOrThrow("int_field").getFieldValue(0).getIntValue());
    }
    assertEquals(docs, seenIds.size());
    for (int i = 0; i < docs; ++i) {
        assertTrue(seenIds.contains(String.valueOf(i)));
    }
}
Also used : Hit(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit) SearchResponse(com.yelp.nrtsearch.server.grpc.SearchResponse) HashSet(java.util.HashSet)

Example 8 with Hit

use of com.yelp.nrtsearch.server.grpc.SearchResponse.Hit in project nrtsearch by Yelp.

the class SearchHandler method fetchFields.

/**
 * Fetch/compute field values for the top hits. This operation may be done in parallel, based on
 * the setting for the fetch thread pool. In addition to filling hit fields, any query {@link
 * com.yelp.nrtsearch.server.luceneserver.search.FetchTasks.FetchTask}s are executed.
 *
 * @param searchContext search parameters
 * @throws IOException on error reading index data
 * @throws ExecutionException on error when performing parallel fetch
 * @throws InterruptedException if parallel fetch is interrupted
 */
private void fetchFields(SearchContext searchContext) throws IOException, ExecutionException, InterruptedException {
    if (searchContext.getResponseBuilder().getHitsBuilderList().isEmpty()) {
        return;
    }
    // sort hits by lucene doc id
    List<Hit.Builder> hitBuilders = new ArrayList<>(searchContext.getResponseBuilder().getHitsBuilderList());
    hitBuilders.sort(Comparator.comparing(Hit.Builder::getLuceneDocId));
    IndexState indexState = searchContext.getIndexState();
    int fetch_thread_pool_size = indexState.getThreadPoolConfiguration().getMaxFetchThreads();
    int min_parallel_fetch_num_fields = indexState.getThreadPoolConfiguration().getMinParallelFetchNumFields();
    int min_parallel_fetch_num_hits = indexState.getThreadPoolConfiguration().getMinParallelFetchNumHits();
    boolean parallelFetchByField = indexState.getThreadPoolConfiguration().getParallelFetchByField();
    if (parallelFetchByField && fetch_thread_pool_size > 1 && searchContext.getRetrieveFields().keySet().size() > min_parallel_fetch_num_fields && hitBuilders.size() > min_parallel_fetch_num_hits) {
        // Fetch fields in parallel
        List<LeafReaderContext> leaves = searchContext.getSearcherAndTaxonomy().searcher.getIndexReader().leaves();
        List<LeafReaderContext> hitIdToLeaves = new ArrayList<>();
        for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
            var hitResponse = hitBuilders.get(hitIndex);
            LeafReaderContext leaf = leaves.get(ReaderUtil.subIndex(hitResponse.getLuceneDocId(), leaves));
            hitIdToLeaves.add(hitIndex, leaf);
        }
        List<String> fields = new ArrayList<>(searchContext.getRetrieveFields().keySet());
        // parallelism is min of fetchThreadPoolSize and fields.size() / MIN_PARALLEL_NUM_FIELDS
        // round up
        int parallelism = Math.min(fetch_thread_pool_size, (fields.size() + min_parallel_fetch_num_fields - 1) / min_parallel_fetch_num_fields);
        List<List<String>> fieldsChunks = Lists.partition(fields, (fields.size() + parallelism - 1) / parallelism);
        List<Future<List<Map<String, CompositeFieldValue>>>> futures = new ArrayList<>();
        // Stored fields are not widely used for NRTSearch (not recommended for memory usage)
        for (List<String> fieldsChunk : fieldsChunks) {
            futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillFieldsTask(indexState, searchContext.getSearcherAndTaxonomy().searcher, hitIdToLeaves, hitBuilders, fieldsChunk, searchContext)));
        }
        for (Future<List<Map<String, CompositeFieldValue>>> future : futures) {
            List<Map<String, CompositeFieldValue>> values = future.get();
            for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
                var hitResponse = hitBuilders.get(hitIndex);
                hitResponse.putAllFields(values.get(hitIndex));
            }
        }
        // execute per hit fetch tasks
        for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
            var hitResponse = hitBuilders.get(hitIndex);
            LeafReaderContext leaf = hitIdToLeaves.get(hitIndex);
            searchContext.getFetchTasks().processHit(searchContext, leaf, hitResponse);
        }
    } else if (!parallelFetchByField && fetch_thread_pool_size > 1 && hitBuilders.size() > min_parallel_fetch_num_hits) {
        // Fetch docs in parallel
        // parallelism is min of fetchThreadPoolSize and hitsBuilder.size() / MIN_PARALLEL_NUM_HITS
        // round up
        int parallelism = Math.min(fetch_thread_pool_size, (hitBuilders.size() + min_parallel_fetch_num_hits - 1) / min_parallel_fetch_num_hits);
        List<List<Hit.Builder>> docChunks = Lists.partition(hitBuilders, (hitBuilders.size() + parallelism - 1) / parallelism);
        // process each document chunk in parallel
        List<Future<?>> futures = new ArrayList<>();
        for (List<Hit.Builder> docChunk : docChunks) {
            futures.add(indexState.getFetchThreadPoolExecutor().submit(new FillDocsTask(searchContext, docChunk)));
        }
        for (Future<?> future : futures) {
            future.get();
        }
    // no need to run the per hit fetch tasks here, since they were done in the FillDocsTask
    } else {
        // single threaded fetch
        FillDocsTask fillDocsTask = new FillDocsTask(searchContext, hitBuilders);
        fillDocsTask.run();
    }
    // execute all hits fetch tasks
    searchContext.getFetchTasks().processAllHits(searchContext, searchContext.getResponseBuilder().getHitsBuilderList());
}
Also used : ArrayList(java.util.ArrayList) Hit(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Future(java.util.concurrent.Future) List(java.util.List) ArrayList(java.util.ArrayList) CompositeFieldValue(com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

Hit (com.yelp.nrtsearch.server.grpc.SearchResponse.Hit)8 SearchResponse (com.yelp.nrtsearch.server.grpc.SearchResponse)5 Query (com.yelp.nrtsearch.server.grpc.Query)3 TermInSetQuery (com.yelp.nrtsearch.server.grpc.TermInSetQuery)3 TermQuery (com.yelp.nrtsearch.server.grpc.TermQuery)3 HashSet (java.util.HashSet)2 SearchRequest (com.yelp.nrtsearch.server.grpc.SearchRequest)1 CompositeFieldValue (com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Future (java.util.concurrent.Future)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Test (org.junit.Test)1