use of org.apache.solr.search.grouping.GroupingSpecification in project lucene-solr by apache.
the class QueryComponent method prepareGrouping.
protected void prepareGrouping(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (null != rb.getCursorMark()) {
// grouping with a cursor - so for now we just don't allow the combination at all
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not use Grouping with " + CursorMarkParams.CURSOR_MARK_PARAM);
}
SolrIndexSearcher searcher = rb.req.getSearcher();
GroupingSpecification groupingSpec = new GroupingSpecification();
rb.setGroupingSpec(groupingSpec);
final SortSpec sortSpec = rb.getSortSpec();
//TODO: move weighting of sort
final SortSpec groupSortSpec = searcher.weightSortSpec(sortSpec, Sort.RELEVANCE);
String withinGroupSortStr = params.get(GroupParams.GROUP_SORT);
//TODO: move weighting of sort
final SortSpec withinGroupSortSpec;
if (withinGroupSortStr != null) {
SortSpec parsedWithinGroupSortSpec = SortSpecParsing.parseSortSpec(withinGroupSortStr, req);
withinGroupSortSpec = searcher.weightSortSpec(parsedWithinGroupSortSpec, Sort.RELEVANCE);
} else {
withinGroupSortSpec = new SortSpec(groupSortSpec.getSort(), groupSortSpec.getSchemaFields(), groupSortSpec.getCount(), groupSortSpec.getOffset());
}
withinGroupSortSpec.setOffset(params.getInt(GroupParams.GROUP_OFFSET, 0));
withinGroupSortSpec.setCount(params.getInt(GroupParams.GROUP_LIMIT, 1));
groupingSpec.setWithinGroupSortSpec(withinGroupSortSpec);
groupingSpec.setGroupSortSpec(groupSortSpec);
String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
Grouping.Format responseFormat;
try {
responseFormat = Grouping.Format.valueOf(formatStr);
} catch (IllegalArgumentException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "Illegal %s parameter", GroupParams.GROUP_FORMAT));
}
groupingSpec.setResponseFormat(responseFormat);
groupingSpec.setFields(params.getParams(GroupParams.GROUP_FIELD));
groupingSpec.setQueries(params.getParams(GroupParams.GROUP_QUERY));
groupingSpec.setFunctions(params.getParams(GroupParams.GROUP_FUNC));
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0);
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
}
use of org.apache.solr.search.grouping.GroupingSpecification in project lucene-solr by apache.
the class QueryComponent method groupedFinishStage.
@SuppressWarnings("unchecked")
protected void groupedFinishStage(final ResponseBuilder rb) {
// To have same response as non-distributed request.
GroupingSpecification groupSpec = rb.getGroupingSpec();
if (rb.mergedTopGroups.isEmpty()) {
for (String field : groupSpec.getFields()) {
rb.mergedTopGroups.put(field, new TopGroups(null, null, 0, 0, new GroupDocs[] {}, Float.NaN));
}
rb.resultIds = new HashMap<>();
}
EndResultTransformer.SolrDocumentSource solrDocumentSource = doc -> {
ShardDoc solrDoc = (ShardDoc) doc;
return rb.retrievedDocuments.get(solrDoc.id);
};
EndResultTransformer endResultTransformer;
if (groupSpec.isMain()) {
endResultTransformer = MAIN_END_RESULT_TRANSFORMER;
} else if (Grouping.Format.grouped == groupSpec.getResponseFormat()) {
endResultTransformer = new GroupedEndResultTransformer(rb.req.getSearcher());
} else if (Grouping.Format.simple == groupSpec.getResponseFormat() && !groupSpec.isMain()) {
endResultTransformer = SIMPLE_END_RESULT_TRANSFORMER;
} else {
return;
}
Map<String, Object> combinedMap = new LinkedHashMap<>();
combinedMap.putAll(rb.mergedTopGroups);
combinedMap.putAll(rb.mergedQueryCommandResults);
endResultTransformer.transform(combinedMap, rb, solrDocumentSource);
}
use of org.apache.solr.search.grouping.GroupingSpecification in project lucene-solr by apache.
the class QueryComponent method process.
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
LOG.debug("process: {}", rb.req.getParams());
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
StatsCache statsCache = req.getCore().getStatsCache();
int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
statsCache.returnLocalStats(rb, searcher);
return;
}
// check if we need to update the local copy of global dfs
if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
// retrieve from request and update local cache
statsCache.receiveGlobalStats(req);
}
SolrQueryResponse rsp = rb.rsp;
IndexSchema schema = searcher.getSchema();
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = schema.getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
if (idField.getType().isPointField()) {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.search(idField.getType().getFieldQuery(null, idField, idArr.get(i)), 1).scoreDocs[0].doc;
if (id >= 0) {
luceneIds[docs++] = id;
}
}
} else {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.getFirstMatch(new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
}
}
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null)
queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
return;
}
// -1 as flag if not set.
long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundamentally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " + CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
QueryResult result = new QueryResult();
cmd.setSegmentTerminateEarly(params.getBool(CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
if (cmd.getSegmentTerminateEarly()) {
result.setSegmentTerminatedEarly(Boolean.FALSE);
}
//
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
// not supported, silently ignore any segmentTerminateEarly flag
cmd.setSegmentTerminateEarly(false);
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setNeedDocSet(// Order matters here
false).setIncludeHitCount(true).setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder().setField(schema.getField(field)).setGroupSort(groupingSpec.getGroupSort()).setTopNGroups(cmd.getOffset() + cmd.getLen()).setIncludeGroupCount(groupingSpec.isIncludeGroupCount()).build());
}
CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0).setSearcher(searcher);
int docsToCollect = Grouping.getMax(groupingSpec.getWithinGroupOffset(), groupingSpec.getWithinGroupLimit(), searcher.maxDoc());
docsToCollect = Math.max(docsToCollect, 1);
for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
}
List<SearchGroup<BytesRef>> topGroups = new ArrayList<>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
BytesRefBuilder builder = new BytesRefBuilder();
schemaField.getType().readableToIndexed(topGroup, builder);
searchGroup.groupValue = builder.get();
}
topGroups.add(searchGroup);
}
secondPhaseBuilder.addCommandField(new TopGroupsFieldCommand.Builder().setField(schemaField).setGroupSort(groupingSpec.getGroupSort()).setSortWithinGroup(groupingSpec.getSortWithinGroup()).setFirstPhaseGroups(topGroups).setMaxDocPerGroup(docsToCollect).setNeedScores(needScores).setNeedMaxScore(needScores).build());
}
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new Builder().setDocsToCollect(docsToCollect).setSort(groupingSpec.getGroupSort()).setQuery(query, rb.req).setDocSet(searcher).build());
}
CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
}
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
// this is normally from "rows"
int limitDefault = cmd.getLen();
Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setGroupSort(groupingSpec.getGroupSort()).setWithinGroupSort(groupingSpec.getSortWithinGroup()).setDefaultFormat(groupingSpec.getResponseFormat()).setLimitDefault(limitDefault).setDefaultTotalCount(defaultTotalCount).setDocsPerGroupDefault(groupingSpec.getWithinGroupLimit()).setGroupOffsetDefault(groupingSpec.getWithinGroupOffset()).setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
}
if (groupingSpec.getFunctions() != null) {
for (String groupByStr : groupingSpec.getFunctions()) {
grouping.addFunctionCommand(groupByStr, rb.req);
}
}
if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
}
if (rb.isNeedDocList() || rb.isDebug()) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
}
grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add("cacheWarning", String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache));
}
rb.setResult(result);
if (grouping.mainResult != null) {
ResultContext ctx = new BasicResultContext(rb, grouping.mainResult);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) {
// Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
// normal search result
searcher.search(result, cmd);
rb.setResult(result);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
if (!rb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, rb.getNextCursorMark().getSerializedTotem());
}
}
if (rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
}
doPrefetch(rb);
}
use of org.apache.solr.search.grouping.GroupingSpecification in project lucene-solr by apache.
the class QueryElevationComponent method prepare.
//---------------------------------------------------------------------------------
// SearchComponent
//---------------------------------------------------------------------------------
@Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
// A runtime param can skip
if (!params.getBool(QueryElevationParams.ENABLE, true)) {
return;
}
boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
// A runtime parameter can alter the config value for forceElevation
boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
String boostStr = params.get(QueryElevationParams.IDS);
String exStr = params.get(QueryElevationParams.EXCLUDE);
Query query = rb.getQuery();
SolrParams localParams = rb.getQparser().getLocalParams();
String qstr = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V);
if (query == null || qstr == null) {
return;
}
ElevationObj booster = null;
try {
if (boostStr != null || exStr != null) {
List<String> boosts = (boostStr != null) ? StrUtils.splitSmart(boostStr, ",", true) : new ArrayList<String>(0);
List<String> excludes = (exStr != null) ? StrUtils.splitSmart(exStr, ",", true) : new ArrayList<String>(0);
booster = new ElevationObj(qstr, boosts, excludes);
} else {
IndexReader reader = req.getSearcher().getIndexReader();
qstr = getAnalyzedQuery(qstr);
booster = getElevationMap(reader, req.getCore()).get(qstr);
}
} catch (Exception ex) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", ex);
}
if (booster != null) {
rb.req.getContext().put(BOOSTED, booster.ids);
rb.req.getContext().put(BOOSTED_PRIORITY, booster.priority);
// Change the query to insert forced documents
if (exclusive == true) {
//we only want these results
rb.setQuery(new BoostQuery(booster.include, 0f));
} else {
BooleanQuery.Builder newq = new BooleanQuery.Builder();
newq.add(query, BooleanClause.Occur.SHOULD);
newq.add(new BoostQuery(booster.include, 0f), BooleanClause.Occur.SHOULD);
if (booster.exclude != null) {
if (markExcludes == false) {
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
}
} else {
//we are only going to mark items as excluded, not actually exclude them. This works
//with the EditorialMarkerFactory
rb.req.getContext().put(EXCLUDED, booster.excludeIds);
}
}
rb.setQuery(newq.build());
}
ElevationComparatorSource comparator = new ElevationComparatorSource(booster);
// if the sort is 'score desc' use a custom sorting method to
// insert documents in their proper place
SortSpec sortSpec = rb.getSortSpec();
if (sortSpec.getSort() == null) {
sortSpec.setSortAndFields(new Sort(new SortField[] { new SortField("_elevate_", comparator, true), new SortField(null, SortField.Type.SCORE, false) }), Arrays.asList(new SchemaField[2]));
} else {
// Check if the sort is based on score
SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator);
if (null != modSortSpec) {
rb.setSortSpec(modSortSpec);
}
}
// alter the sorting in the grouping specification if there is one
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
SortSpec groupSortSpec = groupingSpec.getGroupSortSpec();
SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, force, comparator);
if (modGroupSortSpec != null) {
groupingSpec.setGroupSortSpec(modGroupSortSpec);
}
SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec();
SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, force, comparator);
if (modWithinGroupSortSpec != null) {
groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec);
}
}
}
// Add debugging information
if (rb.isDebug()) {
List<String> match = null;
if (booster != null) {
// Extract the elevated terms into a list
match = new ArrayList<>(booster.priority.size());
for (Object o : booster.include.clauses()) {
TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery();
match.add(tq.getTerm().text());
}
}
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<>();
dbg.add("q", qstr);
dbg.add("match", match);
if (rb.isDebugQuery()) {
rb.addDebugInfo("queryBoosting", dbg);
}
}
}
use of org.apache.solr.search.grouping.GroupingSpecification in project lucene-solr by apache.
the class SimpleFacets method getGroupedCounts.
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter) throws IOException {
GroupingSpecification groupingSpecification = rb.getGroupingSpec();
final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
if (groupField == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter");
}
BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128);
Collector groupWrapper = getInsanityWrapper(groupField, collector);
Collector fieldWrapper = getInsanityWrapper(field, groupWrapper);
// When GroupedFacetCollector can handle numerics we can remove the wrapped collectors
searcher.search(base.getTopFilter(), fieldWrapper);
boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount);
CharsRefBuilder charsRef = new CharsRefBuilder();
FieldType facetFieldType = searcher.getSchema().getFieldType(field);
NamedList<Integer> facetCounts = new NamedList<>();
List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
//:TODO:can we filter earlier than this to make it more efficient?
if (termFilter != null && !termFilter.test(facetEntry.getValue())) {
continue;
}
facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
facetCounts.add(charsRef.toString(), facetEntry.getCount());
}
if (missing) {
facetCounts.add(null, result.getTotalMissingCount());
}
return facetCounts;
}
Aggregations