Search in sources :

Example 1 with TopGroupsResultTransformer

use of org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer in project lucene-solr by apache.

the class QueryComponent method process.

/**
   * Actually run the query
   */
@Override
public void process(ResponseBuilder rb) throws IOException {
    LOG.debug("process: {}", rb.req.getParams());
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
        return;
    }
    SolrIndexSearcher searcher = req.getSearcher();
    StatsCache statsCache = req.getCore().getStatsCache();
    int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
    if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
        statsCache.returnLocalStats(rb, searcher);
        return;
    }
    // check if we need to update the local copy of global dfs
    if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
        // retrieve from request and update local cache
        statsCache.receiveGlobalStats(req);
    }
    SolrQueryResponse rsp = rb.rsp;
    IndexSchema schema = searcher.getSchema();
    // Optional: This could also be implemented by the top-level searcher sending
    // a filter that lists the ids... that would be transparent to
    // the request handler, but would be more expensive (and would preserve score
    // too if desired).
    String ids = params.get(ShardParams.IDS);
    if (ids != null) {
        SchemaField idField = schema.getUniqueKeyField();
        List<String> idArr = StrUtils.splitSmart(ids, ",", true);
        int[] luceneIds = new int[idArr.size()];
        int docs = 0;
        if (idField.getType().isPointField()) {
            for (int i = 0; i < idArr.size(); i++) {
                int id = searcher.search(idField.getType().getFieldQuery(null, idField, idArr.get(i)), 1).scoreDocs[0].doc;
                if (id >= 0) {
                    luceneIds[docs++] = id;
                }
            }
        } else {
            for (int i = 0; i < idArr.size(); i++) {
                int id = searcher.getFirstMatch(new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
                if (id >= 0)
                    luceneIds[docs++] = id;
            }
        }
        DocListAndSet res = new DocListAndSet();
        res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
        if (rb.isNeedDocSet()) {
            // TODO: create a cache for this!
            List<Query> queries = new ArrayList<>();
            queries.add(rb.getQuery());
            List<Query> filters = rb.getFilters();
            if (filters != null)
                queries.addAll(filters);
            res.docSet = searcher.getDocSet(queries);
        }
        rb.setResults(res);
        ResultContext ctx = new BasicResultContext(rb);
        rsp.addResponse(ctx);
        return;
    }
    // -1 as flag if not set.
    long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
    if (null != rb.getCursorMark() && 0 < timeAllowed) {
        // fundamentally incompatible
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " + CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
    }
    QueryCommand cmd = rb.getQueryCommand();
    cmd.setTimeAllowed(timeAllowed);
    req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
    QueryResult result = new QueryResult();
    cmd.setSegmentTerminateEarly(params.getBool(CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
    if (cmd.getSegmentTerminateEarly()) {
        result.setSegmentTerminatedEarly(Boolean.FALSE);
    }
    //
    // grouping / field collapsing
    //
    GroupingSpecification groupingSpec = rb.getGroupingSpec();
    if (groupingSpec != null) {
        // not supported, silently ignore any segmentTerminateEarly flag
        cmd.setSegmentTerminateEarly(false);
        try {
            boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
            if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
                CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setNeedDocSet(// Order matters here
                false).setIncludeHitCount(true).setSearcher(searcher);
                for (String field : groupingSpec.getFields()) {
                    topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder().setField(schema.getField(field)).setGroupSort(groupingSpec.getGroupSort()).setTopNGroups(cmd.getOffset() + cmd.getLen()).setIncludeGroupCount(groupingSpec.isIncludeGroupCount()).build());
                }
                CommandHandler commandHandler = topsGroupsActionBuilder.build();
                commandHandler.execute();
                SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
                rsp.add("firstPhase", commandHandler.processResult(result, serializer));
                rsp.add("totalHitCount", commandHandler.getTotalHitCount());
                rb.setResult(result);
                return;
            } else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
                CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0).setSearcher(searcher);
                int docsToCollect = Grouping.getMax(groupingSpec.getWithinGroupOffset(), groupingSpec.getWithinGroupLimit(), searcher.maxDoc());
                docsToCollect = Math.max(docsToCollect, 1);
                for (String field : groupingSpec.getFields()) {
                    SchemaField schemaField = schema.getField(field);
                    String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
                    if (topGroupsParam == null) {
                        topGroupsParam = new String[0];
                    }
                    List<SearchGroup<BytesRef>> topGroups = new ArrayList<>(topGroupsParam.length);
                    for (String topGroup : topGroupsParam) {
                        SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
                        if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
                            BytesRefBuilder builder = new BytesRefBuilder();
                            schemaField.getType().readableToIndexed(topGroup, builder);
                            searchGroup.groupValue = builder.get();
                        }
                        topGroups.add(searchGroup);
                    }
                    secondPhaseBuilder.addCommandField(new TopGroupsFieldCommand.Builder().setField(schemaField).setGroupSort(groupingSpec.getGroupSort()).setSortWithinGroup(groupingSpec.getSortWithinGroup()).setFirstPhaseGroups(topGroups).setMaxDocPerGroup(docsToCollect).setNeedScores(needScores).setNeedMaxScore(needScores).build());
                }
                for (String query : groupingSpec.getQueries()) {
                    secondPhaseBuilder.addCommandField(new Builder().setDocsToCollect(docsToCollect).setSort(groupingSpec.getGroupSort()).setQuery(query, rb.req).setDocSet(searcher).build());
                }
                CommandHandler commandHandler = secondPhaseBuilder.build();
                commandHandler.execute();
                TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
                rsp.add("secondPhase", commandHandler.processResult(result, serializer));
                rb.setResult(result);
                return;
            }
            int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
            boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
            Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
            // this is normally from "rows"
            int limitDefault = cmd.getLen();
            Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
            grouping.setGroupSort(groupingSpec.getGroupSort()).setWithinGroupSort(groupingSpec.getSortWithinGroup()).setDefaultFormat(groupingSpec.getResponseFormat()).setLimitDefault(limitDefault).setDefaultTotalCount(defaultTotalCount).setDocsPerGroupDefault(groupingSpec.getWithinGroupLimit()).setGroupOffsetDefault(groupingSpec.getWithinGroupOffset()).setGetGroupedDocSet(groupingSpec.isTruncateGroups());
            if (groupingSpec.getFields() != null) {
                for (String field : groupingSpec.getFields()) {
                    grouping.addFieldCommand(field, rb.req);
                }
            }
            if (groupingSpec.getFunctions() != null) {
                for (String groupByStr : groupingSpec.getFunctions()) {
                    grouping.addFunctionCommand(groupByStr, rb.req);
                }
            }
            if (groupingSpec.getQueries() != null) {
                for (String groupByStr : groupingSpec.getQueries()) {
                    grouping.addQueryCommand(groupByStr, rb.req);
                }
            }
            if (rb.isNeedDocList() || rb.isDebug()) {
                // we need a single list of the returned docs
                cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
            }
            grouping.execute();
            if (grouping.isSignalCacheWarning()) {
                rsp.add("cacheWarning", String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache));
            }
            rb.setResult(result);
            if (grouping.mainResult != null) {
                ResultContext ctx = new BasicResultContext(rb, grouping.mainResult);
                rsp.addResponse(ctx);
                rsp.getToLog().add("hits", grouping.mainResult.matches());
            } else if (!grouping.getCommands().isEmpty()) {
                // Can never be empty since grouping.execute() checks for this.
                rsp.add("grouped", result.groupedResults);
                rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
            }
            return;
        } catch (SyntaxError e) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }
    }
    // normal search result
    searcher.search(result, cmd);
    rb.setResult(result);
    ResultContext ctx = new BasicResultContext(rb);
    rsp.addResponse(ctx);
    rsp.getToLog().add("hits", rb.getResults().docList.matches());
    if (!rb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
        if (null != rb.getNextCursorMark()) {
            rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, rb.getNextCursorMark().getSerializedTotem());
        }
    }
    if (rb.mergeFieldHandler != null) {
        rb.mergeFieldHandler.handleMergeFields(rb, searcher);
    } else {
        doFieldSortValues(rb, searcher);
    }
    doPrefetch(rb);
}
Also used : BasicResultContext(org.apache.solr.response.BasicResultContext) ResultContext(org.apache.solr.response.ResultContext) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) RankQuery(org.apache.solr.search.RankQuery) DocListAndSet(org.apache.solr.search.DocListAndSet) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Builder(org.apache.solr.search.grouping.distributed.command.QueryCommand.Builder) ArrayList(java.util.ArrayList) CommandHandler(org.apache.solr.search.grouping.CommandHandler) DocSlice(org.apache.solr.search.DocSlice) BasicResultContext(org.apache.solr.response.BasicResultContext) StatsCache(org.apache.solr.search.stats.StatsCache) QueryResult(org.apache.solr.search.QueryResult) SyntaxError(org.apache.solr.search.SyntaxError) SolrDocumentList(org.apache.solr.common.SolrDocumentList) DocList(org.apache.solr.search.DocList) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) GroupingSpecification(org.apache.solr.search.grouping.GroupingSpecification) SearchGroupsResultTransformer(org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef) SolrQueryResponse(org.apache.solr.response.SolrQueryResponse) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) SearchGroup(org.apache.lucene.search.grouping.SearchGroup) Grouping(org.apache.solr.search.Grouping) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Term(org.apache.lucene.index.Term) TopGroupsResultTransformer(org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer) SchemaField(org.apache.solr.schema.SchemaField) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) SolrParams(org.apache.solr.common.params.SolrParams) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) QueryCommand(org.apache.solr.search.QueryCommand) IndexSchema(org.apache.solr.schema.IndexSchema)

Example 2 with TopGroupsResultTransformer

use of org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer in project lucene-solr by apache.

the class TopGroupsShardResponseProcessor method process.

/**
   * {@inheritDoc}
   */
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
    Sort groupSort = rb.getGroupingSpec().getGroupSort();
    String[] fields = rb.getGroupingSpec().getFields();
    String[] queries = rb.getGroupingSpec().getQueries();
    Sort withinGroupSort = rb.getGroupingSpec().getSortWithinGroup();
    assert withinGroupSort != null;
    // If group.format=simple group.offset doesn't make sense
    int groupOffsetDefault;
    if (rb.getGroupingSpec().getResponseFormat() == Grouping.Format.simple || rb.getGroupingSpec().isMain()) {
        groupOffsetDefault = 0;
    } else {
        groupOffsetDefault = rb.getGroupingSpec().getWithinGroupOffset();
    }
    int docsPerGroupDefault = rb.getGroupingSpec().getWithinGroupLimit();
    Map<String, List<TopGroups<BytesRef>>> commandTopGroups = new HashMap<>();
    for (String field : fields) {
        commandTopGroups.put(field, new ArrayList<TopGroups<BytesRef>>());
    }
    Map<String, List<QueryCommandResult>> commandTopDocs = new HashMap<>();
    for (String query : queries) {
        commandTopDocs.put(query, new ArrayList<QueryCommandResult>());
    }
    TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
    NamedList<Object> shardInfo = null;
    if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
        shardInfo = new SimpleOrderedMap<>();
        rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
    }
    for (ShardResponse srsp : shardRequest.responses) {
        SimpleOrderedMap<Object> individualShardInfo = null;
        if (shardInfo != null) {
            individualShardInfo = new SimpleOrderedMap<>();
            if (srsp.getException() != null) {
                Throwable t = srsp.getException();
                if (t instanceof SolrServerException && ((SolrServerException) t).getCause() != null) {
                    t = ((SolrServerException) t).getCause();
                }
                individualShardInfo.add("error", t.toString());
                StringWriter trace = new StringWriter();
                t.printStackTrace(new PrintWriter(trace));
                individualShardInfo.add("trace", trace.toString());
            } else {
            // summary for successful shard response is added down below
            }
            if (srsp.getSolrResponse() != null) {
                individualShardInfo.add("time", srsp.getSolrResponse().getElapsedTime());
            }
            if (srsp.getShardAddress() != null) {
                individualShardInfo.add("shardAddress", srsp.getShardAddress());
            }
            shardInfo.add(srsp.getShard(), individualShardInfo);
        }
        if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false) && srsp.getException() != null) {
            if (rb.rsp.getResponseHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY) == null) {
                rb.rsp.getResponseHeader().add(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY, Boolean.TRUE);
            }
            // continue if there was an error and we're tolerant.  
            continue;
        }
        NamedList<NamedList> secondPhaseResult = (NamedList<NamedList>) srsp.getSolrResponse().getResponse().get("secondPhase");
        if (secondPhaseResult == null)
            continue;
        Map<String, ?> result = serializer.transformToNative(secondPhaseResult, groupSort, withinGroupSort, srsp.getShard());
        int numFound = 0;
        float maxScore = Float.NaN;
        for (String field : commandTopGroups.keySet()) {
            TopGroups<BytesRef> topGroups = (TopGroups<BytesRef>) result.get(field);
            if (topGroups == null) {
                continue;
            }
            if (individualShardInfo != null) {
                // keep track of this when shards.info=true
                numFound += topGroups.totalHitCount;
                if (Float.isNaN(maxScore) || topGroups.maxScore > maxScore)
                    maxScore = topGroups.maxScore;
            }
            commandTopGroups.get(field).add(topGroups);
        }
        for (String query : queries) {
            QueryCommandResult queryCommandResult = (QueryCommandResult) result.get(query);
            if (individualShardInfo != null) {
                // keep track of this when shards.info=true
                numFound += queryCommandResult.getMatches();
                float thisMax = queryCommandResult.getTopDocs().getMaxScore();
                if (Float.isNaN(maxScore) || thisMax > maxScore)
                    maxScore = thisMax;
            }
            commandTopDocs.get(query).add(queryCommandResult);
        }
        if (individualShardInfo != null) {
            // when shards.info=true
            individualShardInfo.add("numFound", numFound);
            individualShardInfo.add("maxScore", maxScore);
        }
    }
    for (String groupField : commandTopGroups.keySet()) {
        List<TopGroups<BytesRef>> topGroups = commandTopGroups.get(groupField);
        if (topGroups.isEmpty()) {
            continue;
        }
        TopGroups<BytesRef>[] topGroupsArr = new TopGroups[topGroups.size()];
        int docsPerGroup = docsPerGroupDefault;
        if (docsPerGroup < 0) {
            docsPerGroup = 0;
            for (TopGroups subTopGroups : topGroups) {
                docsPerGroup += subTopGroups.totalGroupedHitCount;
            }
        }
        rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, withinGroupSort, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None));
    }
    for (String query : commandTopDocs.keySet()) {
        List<QueryCommandResult> queryCommandResults = commandTopDocs.get(query);
        List<TopDocs> topDocs = new ArrayList<>(queryCommandResults.size());
        int mergedMatches = 0;
        for (QueryCommandResult queryCommandResult : queryCommandResults) {
            topDocs.add(queryCommandResult.getTopDocs());
            mergedMatches += queryCommandResult.getMatches();
        }
        int topN = rb.getGroupingSpec().getOffset() + rb.getGroupingSpec().getLimit();
        final TopDocs mergedTopDocs;
        if (withinGroupSort.equals(Sort.RELEVANCE)) {
            mergedTopDocs = TopDocs.merge(topN, topDocs.toArray(new TopDocs[topDocs.size()]));
        } else {
            mergedTopDocs = TopDocs.merge(withinGroupSort, topN, topDocs.toArray(new TopFieldDocs[topDocs.size()]));
        }
        rb.mergedQueryCommandResults.put(query, new QueryCommandResult(mergedTopDocs, mergedMatches));
    }
    Map<Object, ShardDoc> resultIds = new HashMap<>();
    int i = 0;
    for (TopGroups<BytesRef> topGroups : rb.mergedTopGroups.values()) {
        for (GroupDocs<BytesRef> group : topGroups.groups) {
            for (ScoreDoc scoreDoc : group.scoreDocs) {
                ShardDoc solrDoc = (ShardDoc) scoreDoc;
                // Include the first if there are duplicate IDs
                if (!resultIds.containsKey(solrDoc.id)) {
                    solrDoc.positionInResponse = i++;
                    resultIds.put(solrDoc.id, solrDoc);
                }
            }
        }
    }
    for (QueryCommandResult queryCommandResult : rb.mergedQueryCommandResults.values()) {
        for (ScoreDoc scoreDoc : queryCommandResult.getTopDocs().scoreDocs) {
            ShardDoc solrDoc = (ShardDoc) scoreDoc;
            solrDoc.positionInResponse = i++;
            resultIds.put(solrDoc.id, solrDoc);
        }
    }
    rb.resultIds = resultIds;
}
Also used : HashMap(java.util.HashMap) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ScoreDoc(org.apache.lucene.search.ScoreDoc) ShardResponse(org.apache.solr.handler.component.ShardResponse) TopDocs(org.apache.lucene.search.TopDocs) StringWriter(java.io.StringWriter) Sort(org.apache.lucene.search.Sort) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) TopGroups(org.apache.lucene.search.grouping.TopGroups) BytesRef(org.apache.lucene.util.BytesRef) ShardDoc(org.apache.solr.handler.component.ShardDoc) PrintWriter(java.io.PrintWriter) NamedList(org.apache.solr.common.util.NamedList) QueryCommandResult(org.apache.solr.search.grouping.distributed.command.QueryCommandResult) TopGroupsResultTransformer(org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 BytesRef (org.apache.lucene.util.BytesRef)2 NamedList (org.apache.solr.common.util.NamedList)2 TopGroupsResultTransformer (org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer)2 PrintWriter (java.io.PrintWriter)1 StringWriter (java.io.StringWriter)1 HashMap (java.util.HashMap)1 Term (org.apache.lucene.index.Term)1 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)1 Query (org.apache.lucene.search.Query)1 ScoreDoc (org.apache.lucene.search.ScoreDoc)1 Sort (org.apache.lucene.search.Sort)1 TopDocs (org.apache.lucene.search.TopDocs)1 SearchGroup (org.apache.lucene.search.grouping.SearchGroup)1 TopGroups (org.apache.lucene.search.grouping.TopGroups)1 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)1 SolrServerException (org.apache.solr.client.solrj.SolrServerException)1 SolrDocumentList (org.apache.solr.common.SolrDocumentList)1 SolrException (org.apache.solr.common.SolrException)1