use of org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer in project lucene-solr by apache.
the class QueryComponent method process.
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
LOG.debug("process: {}", rb.req.getParams());
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
StatsCache statsCache = req.getCore().getStatsCache();
int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
statsCache.returnLocalStats(rb, searcher);
return;
}
// check if we need to update the local copy of global dfs
if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
// retrieve from request and update local cache
statsCache.receiveGlobalStats(req);
}
SolrQueryResponse rsp = rb.rsp;
IndexSchema schema = searcher.getSchema();
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = schema.getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
if (idField.getType().isPointField()) {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.search(idField.getType().getFieldQuery(null, idField, idArr.get(i)), 1).scoreDocs[0].doc;
if (id >= 0) {
luceneIds[docs++] = id;
}
}
} else {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.getFirstMatch(new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
}
}
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null)
queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
return;
}
// -1 as flag if not set.
long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundamentally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " + CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
QueryResult result = new QueryResult();
cmd.setSegmentTerminateEarly(params.getBool(CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
if (cmd.getSegmentTerminateEarly()) {
result.setSegmentTerminatedEarly(Boolean.FALSE);
}
//
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
// not supported, silently ignore any segmentTerminateEarly flag
cmd.setSegmentTerminateEarly(false);
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setNeedDocSet(// Order matters here
false).setIncludeHitCount(true).setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder().setField(schema.getField(field)).setGroupSort(groupingSpec.getGroupSort()).setTopNGroups(cmd.getOffset() + cmd.getLen()).setIncludeGroupCount(groupingSpec.isIncludeGroupCount()).build());
}
CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0).setSearcher(searcher);
int docsToCollect = Grouping.getMax(groupingSpec.getWithinGroupOffset(), groupingSpec.getWithinGroupLimit(), searcher.maxDoc());
docsToCollect = Math.max(docsToCollect, 1);
for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
}
List<SearchGroup<BytesRef>> topGroups = new ArrayList<>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
BytesRefBuilder builder = new BytesRefBuilder();
schemaField.getType().readableToIndexed(topGroup, builder);
searchGroup.groupValue = builder.get();
}
topGroups.add(searchGroup);
}
secondPhaseBuilder.addCommandField(new TopGroupsFieldCommand.Builder().setField(schemaField).setGroupSort(groupingSpec.getGroupSort()).setSortWithinGroup(groupingSpec.getSortWithinGroup()).setFirstPhaseGroups(topGroups).setMaxDocPerGroup(docsToCollect).setNeedScores(needScores).setNeedMaxScore(needScores).build());
}
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new Builder().setDocsToCollect(docsToCollect).setSort(groupingSpec.getGroupSort()).setQuery(query, rb.req).setDocSet(searcher).build());
}
CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
}
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
// this is normally from "rows"
int limitDefault = cmd.getLen();
Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setGroupSort(groupingSpec.getGroupSort()).setWithinGroupSort(groupingSpec.getSortWithinGroup()).setDefaultFormat(groupingSpec.getResponseFormat()).setLimitDefault(limitDefault).setDefaultTotalCount(defaultTotalCount).setDocsPerGroupDefault(groupingSpec.getWithinGroupLimit()).setGroupOffsetDefault(groupingSpec.getWithinGroupOffset()).setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
}
if (groupingSpec.getFunctions() != null) {
for (String groupByStr : groupingSpec.getFunctions()) {
grouping.addFunctionCommand(groupByStr, rb.req);
}
}
if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
}
if (rb.isNeedDocList() || rb.isDebug()) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
}
grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add("cacheWarning", String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache));
}
rb.setResult(result);
if (grouping.mainResult != null) {
ResultContext ctx = new BasicResultContext(rb, grouping.mainResult);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) {
// Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
// normal search result
searcher.search(result, cmd);
rb.setResult(result);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
if (!rb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, rb.getNextCursorMark().getSerializedTotem());
}
}
if (rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
}
doPrefetch(rb);
}
use of org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer in project lucene-solr by apache.
the class TopGroupsShardResponseProcessor method process.
/**
* {@inheritDoc}
*/
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
Sort groupSort = rb.getGroupingSpec().getGroupSort();
String[] fields = rb.getGroupingSpec().getFields();
String[] queries = rb.getGroupingSpec().getQueries();
Sort withinGroupSort = rb.getGroupingSpec().getSortWithinGroup();
assert withinGroupSort != null;
// If group.format=simple group.offset doesn't make sense
int groupOffsetDefault;
if (rb.getGroupingSpec().getResponseFormat() == Grouping.Format.simple || rb.getGroupingSpec().isMain()) {
groupOffsetDefault = 0;
} else {
groupOffsetDefault = rb.getGroupingSpec().getWithinGroupOffset();
}
int docsPerGroupDefault = rb.getGroupingSpec().getWithinGroupLimit();
Map<String, List<TopGroups<BytesRef>>> commandTopGroups = new HashMap<>();
for (String field : fields) {
commandTopGroups.put(field, new ArrayList<TopGroups<BytesRef>>());
}
Map<String, List<QueryCommandResult>> commandTopDocs = new HashMap<>();
for (String query : queries) {
commandTopDocs.put(query, new ArrayList<QueryCommandResult>());
}
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
NamedList<Object> shardInfo = null;
if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
shardInfo = new SimpleOrderedMap<>();
rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
}
for (ShardResponse srsp : shardRequest.responses) {
SimpleOrderedMap<Object> individualShardInfo = null;
if (shardInfo != null) {
individualShardInfo = new SimpleOrderedMap<>();
if (srsp.getException() != null) {
Throwable t = srsp.getException();
if (t instanceof SolrServerException && ((SolrServerException) t).getCause() != null) {
t = ((SolrServerException) t).getCause();
}
individualShardInfo.add("error", t.toString());
StringWriter trace = new StringWriter();
t.printStackTrace(new PrintWriter(trace));
individualShardInfo.add("trace", trace.toString());
} else {
// summary for successful shard response is added down below
}
if (srsp.getSolrResponse() != null) {
individualShardInfo.add("time", srsp.getSolrResponse().getElapsedTime());
}
if (srsp.getShardAddress() != null) {
individualShardInfo.add("shardAddress", srsp.getShardAddress());
}
shardInfo.add(srsp.getShard(), individualShardInfo);
}
if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false) && srsp.getException() != null) {
if (rb.rsp.getResponseHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY) == null) {
rb.rsp.getResponseHeader().add(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY, Boolean.TRUE);
}
// continue if there was an error and we're tolerant.
continue;
}
NamedList<NamedList> secondPhaseResult = (NamedList<NamedList>) srsp.getSolrResponse().getResponse().get("secondPhase");
if (secondPhaseResult == null)
continue;
Map<String, ?> result = serializer.transformToNative(secondPhaseResult, groupSort, withinGroupSort, srsp.getShard());
int numFound = 0;
float maxScore = Float.NaN;
for (String field : commandTopGroups.keySet()) {
TopGroups<BytesRef> topGroups = (TopGroups<BytesRef>) result.get(field);
if (topGroups == null) {
continue;
}
if (individualShardInfo != null) {
// keep track of this when shards.info=true
numFound += topGroups.totalHitCount;
if (Float.isNaN(maxScore) || topGroups.maxScore > maxScore)
maxScore = topGroups.maxScore;
}
commandTopGroups.get(field).add(topGroups);
}
for (String query : queries) {
QueryCommandResult queryCommandResult = (QueryCommandResult) result.get(query);
if (individualShardInfo != null) {
// keep track of this when shards.info=true
numFound += queryCommandResult.getMatches();
float thisMax = queryCommandResult.getTopDocs().getMaxScore();
if (Float.isNaN(maxScore) || thisMax > maxScore)
maxScore = thisMax;
}
commandTopDocs.get(query).add(queryCommandResult);
}
if (individualShardInfo != null) {
// when shards.info=true
individualShardInfo.add("numFound", numFound);
individualShardInfo.add("maxScore", maxScore);
}
}
for (String groupField : commandTopGroups.keySet()) {
List<TopGroups<BytesRef>> topGroups = commandTopGroups.get(groupField);
if (topGroups.isEmpty()) {
continue;
}
TopGroups<BytesRef>[] topGroupsArr = new TopGroups[topGroups.size()];
int docsPerGroup = docsPerGroupDefault;
if (docsPerGroup < 0) {
docsPerGroup = 0;
for (TopGroups subTopGroups : topGroups) {
docsPerGroup += subTopGroups.totalGroupedHitCount;
}
}
rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, withinGroupSort, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None));
}
for (String query : commandTopDocs.keySet()) {
List<QueryCommandResult> queryCommandResults = commandTopDocs.get(query);
List<TopDocs> topDocs = new ArrayList<>(queryCommandResults.size());
int mergedMatches = 0;
for (QueryCommandResult queryCommandResult : queryCommandResults) {
topDocs.add(queryCommandResult.getTopDocs());
mergedMatches += queryCommandResult.getMatches();
}
int topN = rb.getGroupingSpec().getOffset() + rb.getGroupingSpec().getLimit();
final TopDocs mergedTopDocs;
if (withinGroupSort.equals(Sort.RELEVANCE)) {
mergedTopDocs = TopDocs.merge(topN, topDocs.toArray(new TopDocs[topDocs.size()]));
} else {
mergedTopDocs = TopDocs.merge(withinGroupSort, topN, topDocs.toArray(new TopFieldDocs[topDocs.size()]));
}
rb.mergedQueryCommandResults.put(query, new QueryCommandResult(mergedTopDocs, mergedMatches));
}
Map<Object, ShardDoc> resultIds = new HashMap<>();
int i = 0;
for (TopGroups<BytesRef> topGroups : rb.mergedTopGroups.values()) {
for (GroupDocs<BytesRef> group : topGroups.groups) {
for (ScoreDoc scoreDoc : group.scoreDocs) {
ShardDoc solrDoc = (ShardDoc) scoreDoc;
// Include the first if there are duplicate IDs
if (!resultIds.containsKey(solrDoc.id)) {
solrDoc.positionInResponse = i++;
resultIds.put(solrDoc.id, solrDoc);
}
}
}
}
for (QueryCommandResult queryCommandResult : rb.mergedQueryCommandResults.values()) {
for (ScoreDoc scoreDoc : queryCommandResult.getTopDocs().scoreDocs) {
ShardDoc solrDoc = (ShardDoc) scoreDoc;
solrDoc.positionInResponse = i++;
resultIds.put(solrDoc.id, solrDoc);
}
}
rb.resultIds = resultIds;
}
Aggregations