use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class StatsInfo method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doStats)
return;
Map<String, StatsValues> statsValues = new LinkedHashMap<>();
for (StatsField statsField : rb._statsInfo.getStatsFields()) {
DocSet docs = statsField.computeBaseDocSet();
statsValues.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(docs));
}
rb.rsp.add("stats", convertToResponse(statsValues));
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class FacetFieldProcessorByEnumTermsStream method _nextBucket.
private SimpleOrderedMap<Object> _nextBucket() throws IOException {
DocSet termSet = null;
try {
while (term != null) {
if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) {
break;
}
int df = termsEnum.docFreq();
if (df < effectiveMincount) {
term = termsEnum.next();
continue;
}
if (termSet != null) {
// termSet.decref(); // OFF-HEAP
termSet = null;
}
int c = 0;
if (hasSubFacets || df >= minDfFilterCache) {
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = sf.getName();
deState.liveDocs = fcontext.searcher.getSlowAtomicReader().getLiveDocs();
deState.termsEnum = termsEnum;
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = minDfFilterCache;
}
if (hasSubFacets || !countOnly) {
DocSet termsAll = fcontext.searcher.getDocSet(deState);
termSet = docs.intersection(termsAll);
// termsAll.decref(); // OFF-HEAP
c = termSet.size();
} else {
c = fcontext.searcher.numDocs(docs, deState);
}
postingsEnum = deState.postingsEnum;
resetStats();
if (!countOnly) {
collect(termSet, 0);
}
} else {
// We don't need the docset here (meaning no sub-facets).
// if countOnly, then we are calculating some other stats...
resetStats();
// lazy convert to fastForRandomSet
if (fastForRandomSet == null) {
fastForRandomSet = docs;
if (docs instanceof SortedIntDocSet) {
// OFF-HEAP todo: also check for native version
SortedIntDocSet sset = (SortedIntDocSet) docs;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
}
}
// iterate over TermDocs to calculate the intersection
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
if (postingsEnum instanceof MultiPostingsEnum) {
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
for (int subindex = 0; subindex < numSubs; subindex++) {
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
if (sub.postingsEnum == null)
continue;
int base = sub.slice.start;
int docid;
if (countOnly) {
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base))
c++;
}
} else {
setNextReader(leaves[sub.slice.readerIndex]);
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base)) {
c++;
collect(docid, 0);
}
}
}
}
} else {
int docid;
if (countOnly) {
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid))
c++;
}
} else {
setNextReader(leaves[0]);
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) {
c++;
collect(docid, 0);
}
}
}
}
}
if (c < effectiveMincount) {
term = termsEnum.next();
continue;
}
// handle offset and limit
if (bucketsToSkip > 0) {
bucketsToSkip--;
term = termsEnum.next();
continue;
}
if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
return null;
}
// set count in case other stats depend on it
countAcc.incrementCount(0, c);
// OK, we have a good bucket to return... first get bucket value before moving to next term
Object bucketVal = sf.getType().toObject(sf, term);
TermQuery bucketQuery = hasSubFacets ? new TermQuery(new Term(freq.field, term)) : null;
term = termsEnum.next();
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
bucket.add("val", bucketVal);
addStats(bucket, 0);
if (hasSubFacets) {
processSubs(bucket, bucketQuery, termSet, false, null);
}
return bucket;
}
} finally {
if (termSet != null) {
// termSet.decref(); // OFF-HEAP
termSet = null;
}
}
// end of the iteration
return null;
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class FacetFieldProcessor method calculateNumBuckets.
private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException {
DocSet domain = fcontext.base;
if (freq.prefix != null) {
Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
domain = fcontext.searcher.getDocSet(prefixFilter, domain);
}
HLLAgg agg = new HLLAgg(freq.field);
SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1);
acc.collect(domain, 0);
acc.key = "numBuckets";
acc.setValues(target, 0);
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class FacetProcessor method getFieldMissing.
@SuppressWarnings("unused")
static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
DocSet answer = docs.andNot(hasVal);
// hasVal.decref(); // OFF-HEAP
return answer;
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class UnInvertedField method visitTerm.
@Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
if (termNum >= maxTermCounts.length) {
// resize by doubling - for very large number of unique terms, expanding
// by 4K and resultant GC will dominate uninvert times. Resize at end if material
int[] newMaxTermCounts = new int[Math.min(Integer.MAX_VALUE - 16, maxTermCounts.length * 2)];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts;
}
final BytesRef term = te.term();
if (te.docFreq() > maxTermDocFreq) {
// this makes a deep copy of the term bytes
Term t = new Term(field, term);
TopTerm topTerm = new TopTerm();
topTerm.term = t.bytes();
topTerm.termNum = termNum;
topTerm.termQuery = new TermQuery(t);
bigTerms.put(topTerm.termNum, topTerm);
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
deState.liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
// TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
deState.termsEnum = te;
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = maxTermDocFreq;
}
postingsEnum = deState.postingsEnum;
DocSet set = searcher.getDocSet(deState);
maxTermCounts[termNum] = set.size();
}
}
Aggregations