Search in sources :

Example 6 with DocSet

use of in project lucene-solr by apache.

the class UnInvertedField method collectDocsGeneric.

// called from FieldFacetProcessor
// TODO: do a callback version that can be specialized!
public void collectDocsGeneric(FacetFieldProcessorByArrayUIF processor) throws IOException {
    int startTermIndex = processor.startTermIndex;
    int endTermIndex = processor.endTermIndex;
    int nTerms = processor.nTerms;
    DocSet docs = processor.fcontext.base;
    int uniqueTerms = 0;
    final CountSlotAcc countAcc = processor.countAcc;
    for (TopTerm tt : bigTerms.values()) {
        if (tt.termNum >= startTermIndex && tt.termNum < endTermIndex) {
            // handle the biggest terms
            try (DocSet intersection = searcher.getDocSet(tt.termQuery, docs)) {
                int collected = processor.collectFirstPhase(intersection, tt.termNum - startTermIndex);
                countAcc.incrementCount(tt.termNum - startTermIndex, collected);
                if (collected > 0) {
    if (termInstances > 0) {
        final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
        final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
        LeafReaderContext ctx = null;
        int segBase = 0;
        int segMax;
        int adjustedMax = 0;
        // TODO: handle facet.prefix here!!!
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            if (doc >= adjustedMax) {
                do {
                    ctx =;
                    if (ctx == null) {
                        // should be impossible
                        throw new RuntimeException("INTERNAL FACET ERROR");
                    segBase = ctx.docBase;
                    segMax = ctx.reader().maxDoc();
                    adjustedMax = segBase + segMax;
                } while (doc >= adjustedMax);
                assert doc >= ctx.docBase;
            int segDoc = doc - segBase;
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                    if (delta == 0)
                    tnum += delta - TNUM_OFFSET;
                    int arrIdx = tnum - startTermIndex;
                    if (arrIdx < 0)
                    if (arrIdx >= nTerms)
                    countAcc.incrementCount(arrIdx, 1);
                    processor.collectFirstPhase(segDoc, arrIdx);
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                        tnum += delta - TNUM_OFFSET;
                        int arrIdx = tnum - startTermIndex;
                        if (arrIdx >= 0) {
                            if (arrIdx >= nTerms)
                            countAcc.incrementCount(arrIdx, 1);
                            processor.collectFirstPhase(segDoc, arrIdx);
                        delta = 0;
                    code >>>= 8;
Also used : DocIterator( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet( DocSet(

Example 7 with DocSet

use of in project lucene-solr by apache.

the class UnInvertedField method getCounts.

private void getCounts(FacetFieldProcessorByArrayUIF processor, CountSlotAcc counts) throws IOException {
    DocSet docs = processor.fcontext.base;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();
    // what about allBuckets?
    if (baseSize < processor.effectiveMincount) {
    final int[] index = this.index;
    boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && docs instanceof BitDocSet;
    if (doNegative) {
        FixedBitSet bs = ((BitDocSet) docs).getBits().clone();
        bs.flip(0, maxDoc);
        // TODO: when iterator across negative elements is available, use that
        // instead of creating a new bitset and inverting.
        docs = new BitDocSet(bs, maxDoc - baseSize);
    // simply negating will mean that we have deleted docs in the set.
    // that should be OK, as their entries in our table should be empty.
    // For the biggest terms, do straight set intersections
    for (TopTerm tt : bigTerms.values()) {
        // TODO: counts could be deferred if sorting by index order
        counts.incrementCount(tt.termNum, searcher.numDocs(tt.termQuery, docs));
    if (termInstances > 0) {
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                    if (delta == 0)
                    tnum += delta - TNUM_OFFSET;
                    counts.incrementCount(tnum, 1);
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                        tnum += delta - TNUM_OFFSET;
                        counts.incrementCount(tnum, 1);
                        delta = 0;
                    code >>>= 8;
    if (doNegative) {
        for (int i = 0; i < numTermsInField; i++) {
            //       counts[i] = maxTermCounts[i] - counts[i];
            counts.incrementCount(i, maxTermCounts[i] - counts.getCount(i) * 2);
/*** TODO - future optimization to handle allBuckets
    if (processor.allBucketsSlot >= 0) {
      int all = 0;  // overflow potential
      for (int i=0; i<numTermsInField; i++) {
        all += counts.getCount(i);
      counts.incrementCount(processor.allBucketsSlot, all);
Also used : BitDocSet( DocIterator( FixedBitSet(org.apache.lucene.util.FixedBitSet) BitDocSet( DocSet(

Example 8 with DocSet

use of in project lucene-solr by apache.

the class SimpleFacets method getFacetTermEnumCounts.

   * Returns a list of terms in the specified field along with the 
   * corresponding count of documents in the set that match that constraint.
   * This method uses the FilterCache to get the intersection count between <code>docs</code>
   * and the DocSet for each term in the filter.
   * @see FacetParams#FACET_LIMIT
   * @see FacetParams#FACET_ZEROS
   * @see FacetParams#FACET_MISSING
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, boolean intersectsCheck) throws IOException {
    /* :TODO: potential optimization...
    * cache the Terms with the highest docFreq and try them first
    * don't enum if we get our max from them
    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
        SortedIntDocSet sset = (SortedIntDocSet) docs;
        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    IndexSchema schema = searcher.getSchema();
    FieldType ft = schema.getFieldType(field);
    assert !ft.isPointField() : "Point Fields don't support enum method";
    LeafReader r = searcher.getSlowAtomicReader();
    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize) : null;
    final NamedList<Integer> res = new NamedList<>();
    // the smallest value in the top 'N' values    
    int min = mincount - 1;
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
    BytesRef prefixTermBytes = null;
    if (prefix != null) {
        String indexedPrefix = ft.toInternal(prefix);
        prefixTermBytes = new BytesRef(indexedPrefix);
    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
        termsEnum = terms.iterator();
        if (prefixTermBytes != null) {
            if (termsEnum.seekCeil(prefixTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
        } else {
            // position termsEnum on first term
            term =;
    PostingsEnum postingsEnum = null;
    CharsRefBuilder charsRef = new CharsRefBuilder();
    if (docs.size() >= mincount) {
        while (term != null) {
            if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes))
            if (termFilter == null || termFilter.test(term)) {
                int df = termsEnum.docFreq();
                // make a large difference (for example, many terms with df=1).
                if (df > 0 && df > min) {
                    int c;
                    if (df >= minDfFilterCache) {
                        if (deState == null) {
                            deState = new SolrIndexSearcher.DocsEnumState();
                            deState.fieldName = field;
                            deState.liveDocs = r.getLiveDocs();
                            deState.termsEnum = termsEnum;
                            deState.postingsEnum = postingsEnum;
                        if (intersectsCheck) {
                            c = searcher.intersects(docs, deState) ? 1 : 0;
                        } else {
                            c = searcher.numDocs(docs, deState);
                        postingsEnum = deState.postingsEnum;
                    } else {
                        // iterate over TermDocs to calculate the intersection
                        // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
                        // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
                        // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
                        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        c = 0;
                        if (postingsEnum instanceof MultiPostingsEnum) {
                            MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
                            int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
                            SEGMENTS_LOOP: for (int subindex = 0; subindex < numSubs; subindex++) {
                                MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
                                if (sub.postingsEnum == null)
                                int base = sub.slice.start;
                                int docid;
                                while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                    if (fastForRandomSet.exists(docid + base)) {
                                        if (intersectsCheck) {
                                            assert c == 1;
                                            break SEGMENTS_LOOP;
                        } else {
                            int docid;
                            while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid)) {
                                    if (intersectsCheck) {
                                        assert c == 1;
                    if (sortByCount) {
                        if (c > min) {
                            BytesRef termCopy = BytesRef.deepCopyOf(term);
                            queue.add(new CountPair<>(termCopy, c));
                            if (queue.size() >= maxsize)
                                min = queue.last().val;
                    } else {
                        if (c >= mincount && --off < 0) {
                            if (--lim < 0)
                            ft.indexedToReadable(term, charsRef);
                            res.add(charsRef.toString(), c);
            term =;
    if (sortByCount) {
        for (CountPair<BytesRef, Integer> p : queue) {
            if (--off >= 0)
            if (--lim < 0)
            ft.indexedToReadable(p.key, charsRef);
            res.add(charsRef.toString(), p.val);
    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, field));
    return res;
Also used : SortedIntDocSet( HashDocSet( TermsEnum(org.apache.lucene.index.TermsEnum) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher( FieldType(org.apache.solr.schema.FieldType) Fields(org.apache.lucene.index.Fields) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) IndexSchema(org.apache.solr.schema.IndexSchema) HashDocSet( DocSet( SortedIntDocSet( BitDocSet(

Example 9 with DocSet

use of in project lucene-solr by apache.

the class SimpleFacets method parseParams.

protected ParsedParams parseParams(String type, String param) throws SyntaxError, IOException {
    SolrParams localParams = QueryParsing.getLocalParams(param, req.getParams());
    DocSet docs = docsOrig;
    String facetValue = param;
    String key = param;
    List<String> tags = Collections.emptyList();
    int threads = -1;
    if (localParams == null) {
        SolrParams params = global;
        SolrParams required = new RequiredSolrParams(params);
        return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
    SolrParams params = SolrParams.wrapDefaults(localParams, global);
    SolrParams required = new RequiredSolrParams(params);
    // remove local params unless it's a query
    if (type != FacetParams.FACET_QUERY) {
        // TODO Cut over to an Enum here
        facetValue = localParams.get(CommonParams.VALUE);
    // reset set the default key now that localParams have been removed
    key = facetValue;
    // allow explicit set of the key
    key = localParams.get(CommonParams.OUTPUT_KEY, key);
    String tagStr = localParams.get(CommonParams.TAG);
    tags = tagStr == null ? Collections.<String>emptyList() : StrUtils.splitSmart(tagStr, ',');
    String threadStr = localParams.get(CommonParams.THREADS);
    if (threadStr != null) {
        threads = Integer.parseInt(threadStr);
    // figure out if we need a new base DocSet
    String excludeStr = localParams.get(CommonParams.EXCLUDE);
    if (excludeStr == null)
        return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
    List<String> excludeTagList = StrUtils.splitSmart(excludeStr, ',');
    docs = computeDocSet(docs, excludeTagList);
    return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
Also used : RequiredSolrParams(org.apache.solr.common.params.RequiredSolrParams) RequiredSolrParams(org.apache.solr.common.params.RequiredSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) HashDocSet( DocSet( SortedIntDocSet( BitDocSet(

Example 10 with DocSet

use of in project lucene-solr by apache.

the class HighlighterTest method payloadFilteringSpanQuery.

public void payloadFilteringSpanQuery() throws IOException {
    String FIELD_NAME = "payloadDelimited";
    assertU(adoc("id", "0", FIELD_NAME, "word|7 word|2"));
    //We search at a lower level than typical Solr tests because there's no QParser for payloads
    //Create query matching this payload
    Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "word")), //bytes for integer 7
    Collections.singletonList(new BytesRef(new byte[] { 0, 0, 0, 7 })));
    //invoke highlight component... the hard way
    final SearchComponent hlComp = h.getCore().getSearchComponent("highlight");
    SolrQueryRequest req = req("hl", "true", "hl.fl", FIELD_NAME, HighlightParams.USE_PHRASE_HIGHLIGHTER, "true");
    try {
        SolrQueryResponse resp = new SolrQueryResponse();
        ResponseBuilder rb = new ResponseBuilder(req, resp, Collections.singletonList(hlComp));
        rb.setResults(req.getSearcher().getDocListAndSet(query, (DocSet) null, null, 0, 1));
        //inspect response
        final String[] snippets = (String[]) resp.getValues().findRecursive("highlighting", "0", FIELD_NAME);
        assertEquals("<em>word|7</em> word|2", snippets[0]);
    } finally {
Also used : SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrQueryResponse(org.apache.solr.response.SolrQueryResponse) Query( SpanTermQuery( SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) SpanTermQuery( SearchComponent(org.apache.solr.handler.component.SearchComponent) Term(org.apache.lucene.index.Term) ResponseBuilder(org.apache.solr.handler.component.ResponseBuilder) BytesRef(org.apache.lucene.util.BytesRef) DocSet( Test(org.junit.Test)


DocSet ( BitDocSet ( Query ( HashDocSet ( SolrIndexSearcher ( SortedIntDocSet ( SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)5 FieldType (org.apache.solr.schema.FieldType)5 ArrayList (java.util.ArrayList)4 BytesRef (org.apache.lucene.util.BytesRef)4 NamedList (org.apache.solr.common.util.NamedList)4 SchemaField (org.apache.solr.schema.SchemaField)4 IdentityHashMap (java.util.IdentityHashMap)3 Map (java.util.Map)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 Term (org.apache.lucene.index.Term)3 SolrException (org.apache.solr.common.SolrException)3 SolrParams (org.apache.solr.common.params.SolrParams)3 QParser ( SyntaxError (