use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class UnInvertedField method visitTerm.
@Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
if (termNum >= maxTermCounts.length) {
// resize by doubling - for very large number of unique terms, expanding
// by 4K and resultant GC will dominate uninvert times. Resize at end if material
int[] newMaxTermCounts = new int[Math.min(Integer.MAX_VALUE - 16, maxTermCounts.length * 2)];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts;
}
final BytesRef term = te.term();
if (te.docFreq() > maxTermDocFreq) {
// this makes a deep copy of the term bytes
Term t = new Term(field, term);
TopTerm topTerm = new TopTerm();
topTerm.term = t.bytes();
topTerm.termNum = termNum;
topTerm.termQuery = new TermQuery(t);
bigTerms.put(topTerm.termNum, topTerm);
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
deState.liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
// TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
deState.termsEnum = te;
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = maxTermDocFreq;
}
postingsEnum = deState.postingsEnum;
DocSet set = searcher.getDocSet(deState);
maxTermCounts[termNum] = set.size();
}
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class SortSlotAcc method collect.
public int collect(DocSet docs, int slot) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
setNextReader(ctx);
}
count++;
// per-seg collectors
collect(doc - segBase, slot);
}
return count;
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class PivotFacetProcessor method processSingle.
/**
* Process a single branch of refinement values for a specific pivot
* @param pivotFields the ordered list of fields in this pivot
* @param refinements the comma separate list of refinement values corresponding to each field in the pivot, or null if there are no refinements
* @param statsFields List of {@link StatsField} instances to compute for each pivot value
* @param facetQueries the list of facet queries hung under this pivot
* @param facetRanges the list of facet ranges hung under this pivot
*/
private SimpleOrderedMap<List<NamedList<Object>>> processSingle(List<String> pivotFields, String refinements, List<StatsField> statsFields, final ParsedParams parsed, List<FacetComponent.FacetBase> facetQueries, List<RangeFacetRequest> facetRanges) throws IOException {
SolrIndexSearcher searcher = rb.req.getSearcher();
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
String field = pivotFields.get(0);
SchemaField sfield = searcher.getSchema().getField(field);
Deque<String> fnames = new LinkedList<>();
for (int i = pivotFields.size() - 1; i > 1; i--) {
fnames.push(pivotFields.get(i));
}
NamedList<Integer> facetCounts;
Deque<String> vnames = new LinkedList<>();
if (null != refinements) {
// All values, split by the field they should go to
List<String> refinementValuesByField = PivotFacetHelper.decodeRefinementValuePath(refinements);
for (int i = refinementValuesByField.size() - 1; i > 0; i--) {
//Only for [1] and on
vnames.push(refinementValuesByField.get(i));
}
String firstFieldsValues = refinementValuesByField.get(0);
facetCounts = new NamedList<>();
facetCounts.add(firstFieldsValues, getSubsetSize(parsed.docs, sfield, firstFieldsValues));
} else {
// no refinements needed
facetCounts = this.getTermCountsForPivots(field, parsed);
}
if (pivotFields.size() > 1) {
String subField = pivotFields.get(1);
pivotResponse.add(parsed.key, doPivots(facetCounts, field, subField, fnames, vnames, parsed, statsFields, facetQueries, facetRanges));
} else {
pivotResponse.add(parsed.key, doPivots(facetCounts, field, null, fnames, vnames, parsed, statsFields, facetQueries, facetRanges));
}
return pivotResponse;
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class QueryComponent method process.
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
LOG.debug("process: {}", rb.req.getParams());
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
StatsCache statsCache = req.getCore().getStatsCache();
int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
statsCache.returnLocalStats(rb, searcher);
return;
}
// check if we need to update the local copy of global dfs
if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
// retrieve from request and update local cache
statsCache.receiveGlobalStats(req);
}
SolrQueryResponse rsp = rb.rsp;
IndexSchema schema = searcher.getSchema();
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = schema.getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
if (idField.getType().isPointField()) {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.search(idField.getType().getFieldQuery(null, idField, idArr.get(i)), 1).scoreDocs[0].doc;
if (id >= 0) {
luceneIds[docs++] = id;
}
}
} else {
for (int i = 0; i < idArr.size(); i++) {
int id = searcher.getFirstMatch(new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
}
}
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null)
queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
return;
}
// -1 as flag if not set.
long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundamentally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " + CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
QueryResult result = new QueryResult();
cmd.setSegmentTerminateEarly(params.getBool(CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
if (cmd.getSegmentTerminateEarly()) {
result.setSegmentTerminatedEarly(Boolean.FALSE);
}
//
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
// not supported, silently ignore any segmentTerminateEarly flag
cmd.setSegmentTerminateEarly(false);
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setNeedDocSet(// Order matters here
false).setIncludeHitCount(true).setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder().setField(schema.getField(field)).setGroupSort(groupingSpec.getGroupSort()).setTopNGroups(cmd.getOffset() + cmd.getLen()).setIncludeGroupCount(groupingSpec.isIncludeGroupCount()).build());
}
CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder().setQueryCommand(cmd).setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0).setSearcher(searcher);
int docsToCollect = Grouping.getMax(groupingSpec.getWithinGroupOffset(), groupingSpec.getWithinGroupLimit(), searcher.maxDoc());
docsToCollect = Math.max(docsToCollect, 1);
for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
}
List<SearchGroup<BytesRef>> topGroups = new ArrayList<>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
BytesRefBuilder builder = new BytesRefBuilder();
schemaField.getType().readableToIndexed(topGroup, builder);
searchGroup.groupValue = builder.get();
}
topGroups.add(searchGroup);
}
secondPhaseBuilder.addCommandField(new TopGroupsFieldCommand.Builder().setField(schemaField).setGroupSort(groupingSpec.getGroupSort()).setSortWithinGroup(groupingSpec.getSortWithinGroup()).setFirstPhaseGroups(topGroups).setMaxDocPerGroup(docsToCollect).setNeedScores(needScores).setNeedMaxScore(needScores).build());
}
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new Builder().setDocsToCollect(docsToCollect).setSort(groupingSpec.getGroupSort()).setQuery(query, rb.req).setDocSet(searcher).build());
}
CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
}
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
// this is normally from "rows"
int limitDefault = cmd.getLen();
Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setGroupSort(groupingSpec.getGroupSort()).setWithinGroupSort(groupingSpec.getSortWithinGroup()).setDefaultFormat(groupingSpec.getResponseFormat()).setLimitDefault(limitDefault).setDefaultTotalCount(defaultTotalCount).setDocsPerGroupDefault(groupingSpec.getWithinGroupLimit()).setGroupOffsetDefault(groupingSpec.getWithinGroupOffset()).setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
}
if (groupingSpec.getFunctions() != null) {
for (String groupByStr : groupingSpec.getFunctions()) {
grouping.addFunctionCommand(groupByStr, rb.req);
}
}
if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
}
if (rb.isNeedDocList() || rb.isDebug()) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
}
grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add("cacheWarning", String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache));
}
rb.setResult(result);
if (grouping.mainResult != null) {
ResultContext ctx = new BasicResultContext(rb, grouping.mainResult);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) {
// Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
// normal search result
searcher.search(result, cmd);
rb.setResult(result);
ResultContext ctx = new BasicResultContext(rb);
rsp.addResponse(ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
if (!rb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, rb.getNextCursorMark().getSerializedTotem());
}
}
if (rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
}
doPrefetch(rb);
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class PivotFacetProcessor method process.
/**
* Processes all of the specified {@link FacetParams#FACET_PIVOT} strings, generating
* a complete response tree for each pivot. The values in this response will either
* be the complete tree of fields and values for the specified pivot in the local index,
* or the requested refinements if the pivot params include the {@link PivotFacet#REFINE_PARAM}
*/
public SimpleOrderedMap<List<NamedList<Object>>> process(String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
// rb._statsInfo may be null if stats=false, ie: refine requests
// if that's the case, but we need to refine w/stats, then we'll lazy init our
// own instance of StatsInfo
StatsInfo statsInfo = rb._statsInfo;
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
for (String pivotList : pivots) {
final ParsedParams parsed;
try {
parsed = this.parseParams(FacetParams.FACET_PIVOT, pivotList);
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, e);
}
List<String> pivotFields = StrUtils.splitSmart(parsed.facetValue, ",", true);
if (pivotFields.size() < 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Pivot Facet needs at least one field name: " + pivotList);
} else {
SolrIndexSearcher searcher = rb.req.getSearcher();
for (String fieldName : pivotFields) {
SchemaField sfield = searcher.getSchema().getField(fieldName);
if (sfield == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + fieldName + "\" is not a valid field name in pivot: " + pivotList);
}
}
}
// start by assuming no local params...
// no local => no refinement
String refineKey = null;
// no local => no stats
List<StatsField> statsFields = Collections.emptyList();
List<FacetComponent.FacetBase> facetQueries = Collections.emptyList();
List<RangeFacetRequest> facetRanges = Collections.emptyList();
if (null != parsed.localParams) {
// we might be refining..
refineKey = parsed.localParams.get(PivotFacet.REFINE_PARAM);
String statsLocalParam = parsed.localParams.get(StatsParams.STATS);
if (null != refineKey && null != statsLocalParam && null == statsInfo) {
// we are refining and need to compute stats,
// but stats component hasn't inited StatsInfo (because we
// don't need/want top level stats when refining) so we lazy init
// our own copy of StatsInfo
statsInfo = new StatsInfo(rb);
}
statsFields = getTaggedStatsFields(statsInfo, statsLocalParam);
try {
FacetComponent.FacetContext facetContext = FacetComponent.FacetContext.getFacetContext(req);
String taggedQueries = parsed.localParams.get(QUERY);
if (StringUtils.isEmpty(taggedQueries)) {
facetQueries = Collections.emptyList();
} else {
List<String> localParamValue = StrUtils.splitSmart(taggedQueries, ',');
if (localParamValue.size() > 1) {
String msg = QUERY + " local param of " + FacetParams.FACET_PIVOT + "may not include tags separated by a comma - please use a common tag on all " + FacetParams.FACET_QUERY + " params you wish to compute under this pivot";
throw new SolrException(ErrorCode.BAD_REQUEST, msg);
}
taggedQueries = localParamValue.get(0);
facetQueries = facetContext.getQueryFacetsForTag(taggedQueries);
}
String taggedRanges = parsed.localParams.get(RANGE);
if (StringUtils.isEmpty(taggedRanges)) {
facetRanges = Collections.emptyList();
} else {
List<String> localParamValue = StrUtils.splitSmart(taggedRanges, ',');
if (localParamValue.size() > 1) {
String msg = RANGE + " local param of " + FacetParams.FACET_PIVOT + "may not include tags separated by a comma - please use a common tag on all " + FacetParams.FACET_RANGE + " params you wish to compute under this pivot";
throw new SolrException(ErrorCode.BAD_REQUEST, msg);
}
taggedRanges = localParamValue.get(0);
facetRanges = facetContext.getRangeFacetRequestsForTag(taggedRanges);
}
} catch (IllegalStateException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Faceting context not set, cannot calculate pivot values");
}
}
if (null != refineKey) {
String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM + refineKey);
for (String refinements : refinementValuesByField) {
pivotResponse.addAll(processSingle(pivotFields, refinements, statsFields, parsed, facetQueries, facetRanges));
}
} else {
pivotResponse.addAll(processSingle(pivotFields, null, statsFields, parsed, facetQueries, facetRanges));
}
}
return pivotResponse;
}
Aggregations