Search in sources :

Example 11 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class StatsInfo method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doStats)
        return;
    Map<String, StatsValues> statsValues = new LinkedHashMap<>();
    for (StatsField statsField : rb._statsInfo.getStatsFields()) {
        DocSet docs = statsField.computeBaseDocSet();
        statsValues.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(docs));
    }
    rb.rsp.add("stats", convertToResponse(statsValues));
}
Also used : DocSet(org.apache.solr.search.DocSet) LinkedHashMap(java.util.LinkedHashMap)

Example 12 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class FacetFieldProcessorByEnumTermsStream method _nextBucket.

private SimpleOrderedMap<Object> _nextBucket() throws IOException {
    DocSet termSet = null;
    try {
        while (term != null) {
            if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) {
                break;
            }
            int df = termsEnum.docFreq();
            if (df < effectiveMincount) {
                term = termsEnum.next();
                continue;
            }
            if (termSet != null) {
                // termSet.decref(); // OFF-HEAP
                termSet = null;
            }
            int c = 0;
            if (hasSubFacets || df >= minDfFilterCache) {
                if (deState == null) {
                    deState = new SolrIndexSearcher.DocsEnumState();
                    deState.fieldName = sf.getName();
                    deState.liveDocs = fcontext.searcher.getSlowAtomicReader().getLiveDocs();
                    deState.termsEnum = termsEnum;
                    deState.postingsEnum = postingsEnum;
                    deState.minSetSizeCached = minDfFilterCache;
                }
                if (hasSubFacets || !countOnly) {
                    DocSet termsAll = fcontext.searcher.getDocSet(deState);
                    termSet = docs.intersection(termsAll);
                    // termsAll.decref(); // OFF-HEAP
                    c = termSet.size();
                } else {
                    c = fcontext.searcher.numDocs(docs, deState);
                }
                postingsEnum = deState.postingsEnum;
                resetStats();
                if (!countOnly) {
                    collect(termSet, 0);
                }
            } else {
                // We don't need the docset here (meaning no sub-facets).
                // if countOnly, then we are calculating some other stats...
                resetStats();
                // lazy convert to fastForRandomSet
                if (fastForRandomSet == null) {
                    fastForRandomSet = docs;
                    if (docs instanceof SortedIntDocSet) {
                        // OFF-HEAP todo: also check for native version
                        SortedIntDocSet sset = (SortedIntDocSet) docs;
                        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
                    }
                }
                // iterate over TermDocs to calculate the intersection
                postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                if (postingsEnum instanceof MultiPostingsEnum) {
                    MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
                    int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
                    for (int subindex = 0; subindex < numSubs; subindex++) {
                        MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
                        if (sub.postingsEnum == null)
                            continue;
                        int base = sub.slice.start;
                        int docid;
                        if (countOnly) {
                            while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base))
                                    c++;
                            }
                        } else {
                            setNextReader(leaves[sub.slice.readerIndex]);
                            while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base)) {
                                    c++;
                                    collect(docid, 0);
                                }
                            }
                        }
                    }
                } else {
                    int docid;
                    if (countOnly) {
                        while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid))
                                c++;
                        }
                    } else {
                        setNextReader(leaves[0]);
                        while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid)) {
                                c++;
                                collect(docid, 0);
                            }
                        }
                    }
                }
            }
            if (c < effectiveMincount) {
                term = termsEnum.next();
                continue;
            }
            // handle offset and limit
            if (bucketsToSkip > 0) {
                bucketsToSkip--;
                term = termsEnum.next();
                continue;
            }
            if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
                return null;
            }
            // set count in case other stats depend on it
            countAcc.incrementCount(0, c);
            // OK, we have a good bucket to return... first get bucket value before moving to next term
            Object bucketVal = sf.getType().toObject(sf, term);
            TermQuery bucketQuery = hasSubFacets ? new TermQuery(new Term(freq.field, term)) : null;
            term = termsEnum.next();
            SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
            bucket.add("val", bucketVal);
            addStats(bucket, 0);
            if (hasSubFacets) {
                processSubs(bucket, bucketQuery, termSet, false, null);
            }
            return bucket;
        }
    } finally {
        if (termSet != null) {
            // termSet.decref();  // OFF-HEAP
            termSet = null;
        }
    }
    // end of the iteration
    return null;
}
Also used : SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) HashDocSet(org.apache.solr.search.HashDocSet) TermQuery(org.apache.lucene.search.TermQuery) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Term(org.apache.lucene.index.Term) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) HashDocSet(org.apache.solr.search.HashDocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) DocSet(org.apache.solr.search.DocSet)

Example 13 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class FacetFieldProcessor method calculateNumBuckets.

private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException {
    DocSet domain = fcontext.base;
    if (freq.prefix != null) {
        Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
        domain = fcontext.searcher.getDocSet(prefixFilter, domain);
    }
    HLLAgg agg = new HLLAgg(freq.field);
    SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1);
    acc.collect(domain, 0);
    acc.key = "numBuckets";
    acc.setValues(target, 0);
}
Also used : Query(org.apache.lucene.search.Query) DocSet(org.apache.solr.search.DocSet)

Example 14 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class FacetProcessor method getFieldMissing.

@SuppressWarnings("unused")
static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
    DocSet answer = docs.andNot(hasVal);
    // hasVal.decref(); // OFF-HEAP
    return answer;
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet)

Example 15 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class UnInvertedField method visitTerm.

@Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
    if (termNum >= maxTermCounts.length) {
        // resize by doubling - for very large number of unique terms, expanding
        // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
        int[] newMaxTermCounts = new int[Math.min(Integer.MAX_VALUE - 16, maxTermCounts.length * 2)];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
        maxTermCounts = newMaxTermCounts;
    }
    final BytesRef term = te.term();
    if (te.docFreq() > maxTermDocFreq) {
        // this makes a deep copy of the term bytes
        Term t = new Term(field, term);
        TopTerm topTerm = new TopTerm();
        topTerm.term = t.bytes();
        topTerm.termNum = termNum;
        topTerm.termQuery = new TermQuery(t);
        bigTerms.put(topTerm.termNum, topTerm);
        if (deState == null) {
            deState = new SolrIndexSearcher.DocsEnumState();
            deState.fieldName = field;
            deState.liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
            // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
            deState.termsEnum = te;
            deState.postingsEnum = postingsEnum;
            deState.minSetSizeCached = maxTermDocFreq;
        }
        postingsEnum = deState.postingsEnum;
        DocSet set = searcher.getDocSet(deState);
        maxTermCounts[termNum] = set.size();
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) BytesRef(org.apache.lucene.util.BytesRef) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet)

Aggregations

DocSet (org.apache.solr.search.DocSet)24 BitDocSet (org.apache.solr.search.BitDocSet)12 Query (org.apache.lucene.search.Query)9 HashDocSet (org.apache.solr.search.HashDocSet)6 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)6 SortedIntDocSet (org.apache.solr.search.SortedIntDocSet)6 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)5 FieldType (org.apache.solr.schema.FieldType)5 ArrayList (java.util.ArrayList)4 BytesRef (org.apache.lucene.util.BytesRef)4 NamedList (org.apache.solr.common.util.NamedList)4 SchemaField (org.apache.solr.schema.SchemaField)4 IdentityHashMap (java.util.IdentityHashMap)3 Map (java.util.Map)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 Term (org.apache.lucene.index.Term)3 SolrException (org.apache.solr.common.SolrException)3 SolrParams (org.apache.solr.common.params.SolrParams)3 QParser (org.apache.solr.search.QParser)3 SyntaxError (org.apache.solr.search.SyntaxError)3