Search in sources :

Example 1 with DocIterator

use of in project lucene-solr by apache.

the class IntervalFacets method getCountNumeric.

private void getCountNumeric() throws IOException {
    final FieldType ft = schemaField.getType();
    final String fieldName = schemaField.getName();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
        throw new IllegalStateException();
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx =;
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch(numericType) {
                case LONG:
                case DATE:
                case INTEGER:
                    longs = DocValues.getNumeric(ctx.reader(), fieldName);
                case FLOAT:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                case DOUBLE:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                    throw new AssertionError();
        int valuesDocID = longs.docID();
        if (valuesDocID < doc - ctx.docBase) {
            valuesDocID = longs.advance(doc - ctx.docBase);
        if (valuesDocID == doc - ctx.docBase) {
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DocIterator( IOException( FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) FieldType(org.apache.solr.schema.FieldType) NumberType(org.apache.solr.schema.NumberType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 2 with DocIterator

use of in project lucene-solr by apache.

the class NumericFacets method getCountsSingleValue.

private static NamedList<Integer> getCountsSingleValue(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
        throw new IllegalStateException();
    // We don't return zeros when using PointFields or when index=false
    zeros = zeros && !ft.isPointField() && sf.indexed();
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    // 1. accumulate
    final HashTable hashTable = new HashTable(true);
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx =;
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch(numericType) {
                case LONG:
                case DATE:
                case INTEGER:
                    // Long, Date and Integer
                    longs = DocValues.getNumeric(ctx.reader(), fieldName);
                case FLOAT:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                case DOUBLE:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                    throw new AssertionError("Unexpected type: " + numericType);
        int valuesDocID = longs.docID();
        if (valuesDocID < doc - ctx.docBase) {
            valuesDocID = longs.advance(doc - ctx.docBase);
        if (valuesDocID == doc - ctx.docBase) {
            hashTable.add(doc, longs.longValue(), 1);
        } else {
    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {

            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {

            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<>();
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<>();
        while (pq.size() > offset) {
        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        if (zeros && (limit < 0 || result.size() < limit)) {
            // need to merge with the term dict
            if (!sf.indexed() && !sf.hasDocValues()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            for (int i = 0; i < result.size(); ++i) {
            final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                final TermsEnum termsEnum = terms.iterator();
                BytesRef term;
                switch(termsEnum.seekCeil(prefix)) {
                    case FOUND:
                    case NOT_FOUND:
                        term = termsEnum.term();
                    case END:
                        term = null;
                        throw new AssertionError();
                final CharsRefBuilder spare = new CharsRefBuilder();
                for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                    term =;
                for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
    } else {
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        final Map<String, Integer> counts = new HashMap<>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            final TermsEnum termsEnum = terms.iterator();
            BytesRef term;
            switch(termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                case END:
                    term = null;
                    throw new AssertionError();
            final CharsRefBuilder spare = new CharsRefBuilder();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term =;
            for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                result.add(termStr, count);
    if (missing) {
        result.add(null, missingCount);
    return result;
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DocIterator( HashMap(java.util.HashMap) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) IOException( FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) ArrayDeque(java.util.ArrayDeque) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) ValueSource(org.apache.lucene.queries.function.ValueSource) FunctionValues(org.apache.lucene.queries.function.FunctionValues)

Example 3 with DocIterator

use of in project lucene-solr by apache.

the class FacetProcessor method collect.

int collect(DocSet docs, int slot) throws IOException {
    int count = 0;
    SolrIndexSearcher searcher = fcontext.searcher;
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx =;
                if (ctx == null) {
                    // should be impossible
                    throw new RuntimeException("INTERNAL FACET ERROR");
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
        // per-seg collectors
        collect(doc - segBase, slot);
    return count;
Also used : DocIterator( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SolrIndexSearcher(

Example 4 with DocIterator

use of in project lucene-solr by apache.

the class UnInvertedField method collectDocsGeneric.

// called from FieldFacetProcessor
// TODO: do a callback version that can be specialized!
public void collectDocsGeneric(FacetFieldProcessorByArrayUIF processor) throws IOException {
    int startTermIndex = processor.startTermIndex;
    int endTermIndex = processor.endTermIndex;
    int nTerms = processor.nTerms;
    DocSet docs = processor.fcontext.base;
    int uniqueTerms = 0;
    final CountSlotAcc countAcc = processor.countAcc;
    for (TopTerm tt : bigTerms.values()) {
        if (tt.termNum >= startTermIndex && tt.termNum < endTermIndex) {
            // handle the biggest terms
            try (DocSet intersection = searcher.getDocSet(tt.termQuery, docs)) {
                int collected = processor.collectFirstPhase(intersection, tt.termNum - startTermIndex);
                countAcc.incrementCount(tt.termNum - startTermIndex, collected);
                if (collected > 0) {
    if (termInstances > 0) {
        final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
        final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
        LeafReaderContext ctx = null;
        int segBase = 0;
        int segMax;
        int adjustedMax = 0;
        // TODO: handle facet.prefix here!!!
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            if (doc >= adjustedMax) {
                do {
                    ctx =;
                    if (ctx == null) {
                        // should be impossible
                        throw new RuntimeException("INTERNAL FACET ERROR");
                    segBase = ctx.docBase;
                    segMax = ctx.reader().maxDoc();
                    adjustedMax = segBase + segMax;
                } while (doc >= adjustedMax);
                assert doc >= ctx.docBase;
            int segDoc = doc - segBase;
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                    if (delta == 0)
                    tnum += delta - TNUM_OFFSET;
                    int arrIdx = tnum - startTermIndex;
                    if (arrIdx < 0)
                    if (arrIdx >= nTerms)
                    countAcc.incrementCount(arrIdx, 1);
                    processor.collectFirstPhase(segDoc, arrIdx);
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                        tnum += delta - TNUM_OFFSET;
                        int arrIdx = tnum - startTermIndex;
                        if (arrIdx >= 0) {
                            if (arrIdx >= nTerms)
                            countAcc.incrementCount(arrIdx, 1);
                            processor.collectFirstPhase(segDoc, arrIdx);
                        delta = 0;
                    code >>>= 8;
Also used : DocIterator( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet( DocSet(

Example 5 with DocIterator

use of in project lucene-solr by apache.

the class UnInvertedField method getCounts.

private void getCounts(FacetFieldProcessorByArrayUIF processor, CountSlotAcc counts) throws IOException {
    DocSet docs = processor.fcontext.base;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();
    // what about allBuckets?
    if (baseSize < processor.effectiveMincount) {
    final int[] index = this.index;
    boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && docs instanceof BitDocSet;
    if (doNegative) {
        FixedBitSet bs = ((BitDocSet) docs).getBits().clone();
        bs.flip(0, maxDoc);
        // TODO: when iterator across negative elements is available, use that
        // instead of creating a new bitset and inverting.
        docs = new BitDocSet(bs, maxDoc - baseSize);
    // simply negating will mean that we have deleted docs in the set.
    // that should be OK, as their entries in our table should be empty.
    // For the biggest terms, do straight set intersections
    for (TopTerm tt : bigTerms.values()) {
        // TODO: counts could be deferred if sorting by index order
        counts.incrementCount(tt.termNum, searcher.numDocs(tt.termQuery, docs));
    if (termInstances > 0) {
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                    if (delta == 0)
                    tnum += delta - TNUM_OFFSET;
                    counts.incrementCount(tnum, 1);
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                        tnum += delta - TNUM_OFFSET;
                        counts.incrementCount(tnum, 1);
                        delta = 0;
                    code >>>= 8;
    if (doNegative) {
        for (int i = 0; i < numTermsInField; i++) {
            //       counts[i] = maxTermCounts[i] - counts[i];
            counts.incrementCount(i, maxTermCounts[i] - counts.getCount(i) * 2);
/*** TODO - future optimization to handle allBuckets
    if (processor.allBucketsSlot >= 0) {
      int all = 0;  // overflow potential
      for (int i=0; i<numTermsInField; i++) {
        all += counts.getCount(i);
      counts.incrementCount(processor.allBucketsSlot, all);
Also used : BitDocSet( DocIterator( FixedBitSet(org.apache.lucene.util.FixedBitSet) BitDocSet( DocSet(


DocIterator ( LeafReaderContext (org.apache.lucene.index.LeafReaderContext)10 SchemaField (org.apache.solr.schema.SchemaField)9 DocList ( NamedList (org.apache.solr.common.util.NamedList)7 FieldType (org.apache.solr.schema.FieldType)7 SolrIndexSearcher ( Document (org.apache.lucene.document.Document)6 IOException ( SolrParams (org.apache.solr.common.params.SolrParams)5 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)5 ArrayList (java.util.ArrayList)4 IndexableField (org.apache.lucene.index.IndexableField)4 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)4 FixedBitSet (org.apache.lucene.util.FixedBitSet)4 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 Query ( Sort ( SolrException (org.apache.solr.common.SolrException)3 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)3