use of org.apache.solr.search.Filter in project lucene-solr by apache.
the class FacetFieldProcessorByArrayDV method collectDocs.
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) {
// TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc == null && allBucketsAcc == null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
// +3 to increase test coverage with small tests
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
// internal - override perSeg heuristic
if (freq.perSeg != null)
accumSeg = canDoPerSeg && freq.perSeg;
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
// better GC
reuse = null;
}
use of org.apache.solr.search.Filter in project lucene-solr by apache.
the class AnalyticsStats method execute.
/**
* Calculates the analytics requested in the Parameters.
*
* @return List of results formated to mirror the input XML.
* @throws IOException if execution fails
*/
public NamedList<?> execute() throws IOException {
statsCollector.startRequest();
NamedList<Object> res = new NamedList<>();
List<AnalyticsRequest> requests;
requests = AnalyticsRequestFactory.parse(searcher.getSchema(), params);
if (requests == null || requests.size() == 0) {
return res;
}
statsCollector.addRequests(requests.size());
// Get filter to all docs
Filter filter = docs.getTopFilter();
// Computing each Analytics Request Separately
for (AnalyticsRequest areq : requests) {
// The Accumulator which will control the statistics generation
// for the entire analytics request
ValueAccumulator accumulator;
// The number of total facet requests
int facets = areq.getFieldFacets().size() + areq.getRangeFacets().size() + areq.getQueryFacets().size();
try {
if (facets == 0) {
accumulator = BasicAccumulator.create(searcher, docs, areq);
} else {
accumulator = FacetingAccumulator.create(searcher, docs, areq, req);
}
} catch (IOException e) {
log.warn("Analytics request '" + areq.getName() + "' failed", e);
continue;
}
statsCollector.addStatsCollected(((BasicAccumulator) accumulator).getNumStatsCollectors());
statsCollector.addStatsRequests(areq.getExpressions().size());
statsCollector.addFieldFacets(areq.getFieldFacets().size());
statsCollector.addRangeFacets(areq.getRangeFacets().size());
statsCollector.addQueryFacets(areq.getQueryFacets().size());
statsCollector.addQueries(((BasicAccumulator) accumulator).getNumQueries());
// Loop through the documents returned by the query and add to accumulator
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
for (int leafNum = 0; leafNum < contexts.size(); leafNum++) {
LeafReaderContext context = contexts.get(leafNum);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(context, null);
DocIdSetIterator disi = null;
if (dis != null) {
disi = dis.iterator();
}
if (disi != null) {
accumulator.getLeafCollector(context);
int doc = disi.nextDoc();
while (doc != DocIdSetIterator.NO_MORE_DOCS) {
// Add a document to the statistics being generated
accumulator.collect(doc);
doc = disi.nextDoc();
}
}
}
// do some post-processing
accumulator.postProcess();
// compute the stats
accumulator.compute();
res.add(areq.getName(), accumulator.export());
}
statsCollector.endRequest();
return res;
}
use of org.apache.solr.search.Filter in project lucene-solr by apache.
the class CurrencyValue method getRangeQuery.
public Query getRangeQuery(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) {
String currencyCode = (p1 != null) ? p1.getCurrencyCode() : (p2 != null) ? p2.getCurrencyCode() : defaultCurrency;
// ValueSourceRangeFilter doesn't check exists(), so we have to
final Filter docsWithValues = new QueryWrapperFilter(new FieldValueQuery(getAmountField(field).getName()));
final Filter vsRangeFilter = new ValueSourceRangeFilter(new RawCurrencyValueSource(field, currencyCode, parser), p1 == null ? null : p1.getAmount() + "", p2 == null ? null : p2.getAmount() + "", minInclusive, maxInclusive);
final BooleanQuery.Builder docsInRange = new BooleanQuery.Builder();
docsInRange.add(docsWithValues, Occur.FILTER);
docsInRange.add(vsRangeFilter, Occur.FILTER);
return new SolrConstantScoreQuery(new QueryWrapperFilter(docsInRange.build()));
}
use of org.apache.solr.search.Filter in project lucene-solr by apache.
the class BlockJoinDocSetFacetComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
final BlockJoinParentQParser.AllParentsAware bjq = (BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey);
if (bjq != null) {
final DocSet parentResult = rb.getResults().docSet;
final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery());
final DocSet allChildren = BlockJoin.toChildren(parentResult, allParentsBitsDocSet, rb.req.getSearcher().getDocSetBits(new MatchAllDocsQuery()), QueryContext.newContext(rb.req.getSearcher()));
final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery());
final DocSet selectedChildren = allChildren.intersection(childQueryDocSet);
// don't include parent into facet counts
//childResult = childResult.union(parentResult);// just to mimic the current logic
final List<LeafReaderContext> leaves = rb.req.getSearcher().getIndexReader().leaves();
Filter filter = selectedChildren.getTopFilter();
final BlockJoinFacetAccsHolder facetCounter = new BlockJoinFacetAccsHolder(rb.req);
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet);
if (iter.hasNext()) {
facetCounter.doSetNextReader(subCtx);
facetCounter.countFacets(iter);
}
}
facetCounter.finish();
rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, facetCounter);
super.process(rb);
}
}
use of org.apache.solr.search.Filter in project lucene-solr by apache.
the class BlockJoinParentQParser method getCachedFilter.
static BitDocIdSetFilterWrapper getCachedFilter(final SolrQueryRequest request, Query parentList) {
SolrCache parentCache = request.getSearcher().getCache(CACHE_NAME);
// lazily retrieve from solr cache
Filter filter = null;
if (parentCache != null) {
filter = (Filter) parentCache.get(parentList);
}
BitDocIdSetFilterWrapper result;
if (filter instanceof BitDocIdSetFilterWrapper) {
result = (BitDocIdSetFilterWrapper) filter;
} else {
result = new BitDocIdSetFilterWrapper(createParentFilter(parentList));
if (parentCache != null) {
parentCache.put(parentList, result);
}
}
return result;
}
Aggregations