Search in sources :

Example 1 with FilterNumericDocValues

use of org.apache.lucene.index.FilterNumericDocValues in project lucene-solr by apache.

the class IntervalFacets method getCountNumeric.

private void getCountNumeric() throws IOException {
    final FieldType ft = schemaField.getType();
    final String fieldName = schemaField.getName();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
        throw new IllegalStateException();
    }
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch(numericType) {
                case LONG:
                case DATE:
                case INTEGER:
                    longs = DocValues.getNumeric(ctx.reader(), fieldName);
                    break;
                case FLOAT:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                case DOUBLE:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                default:
                    throw new AssertionError();
            }
        }
        int valuesDocID = longs.docID();
        if (valuesDocID < doc - ctx.docBase) {
            valuesDocID = longs.advance(doc - ctx.docBase);
        }
        if (valuesDocID == doc - ctx.docBase) {
            accumIntervalWithValue(longs.longValue());
        }
    }
}
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DocIterator(org.apache.solr.search.DocIterator) IOException(java.io.IOException) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) FieldType(org.apache.solr.schema.FieldType) NumberType(org.apache.solr.schema.NumberType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 2 with FilterNumericDocValues

use of org.apache.lucene.index.FilterNumericDocValues in project lucene-solr by apache.

the class NumericFacets method getCountsSingleValue.

private static NamedList<Integer> getCountsSingleValue(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
        throw new IllegalStateException();
    }
    // We don't return zeros when using PointFields or when index=false
    zeros = zeros && !ft.isPointField() && sf.indexed();
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    // 1. accumulate
    final HashTable hashTable = new HashTable(true);
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch(numericType) {
                case LONG:
                case DATE:
                case INTEGER:
                    // Long, Date and Integer
                    longs = DocValues.getNumeric(ctx.reader(), fieldName);
                    break;
                case FLOAT:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                case DOUBLE:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                default:
                    throw new AssertionError("Unexpected type: " + numericType);
            }
        }
        int valuesDocID = longs.docID();
        if (valuesDocID < doc - ctx.docBase) {
            valuesDocID = longs.advance(doc - ctx.docBase);
        }
        if (valuesDocID == doc - ctx.docBase) {
            hashTable.add(doc, longs.longValue(), 1);
        } else {
            ++missingCount;
        }
    }
    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {

            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {

            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }
    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<>();
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }
        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        if (zeros && (limit < 0 || result.size() < limit)) {
            // need to merge with the term dict
            if (!sf.indexed() && !sf.hasDocValues()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator();
                BytesRef term;
                switch(termsEnum.seekCeil(prefix)) {
                    case FOUND:
                    case NOT_FOUND:
                        term = termsEnum.term();
                        break;
                    case END:
                        term = null;
                        break;
                    default:
                        throw new AssertionError();
                }
                final CharsRefBuilder spare = new CharsRefBuilder();
                for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator();
            BytesRef term;
            switch(termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
            }
            final CharsRefBuilder spare = new CharsRefBuilder();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }
    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DocIterator(org.apache.solr.search.DocIterator) HashMap(java.util.HashMap) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) IOException(java.io.IOException) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) ArrayDeque(java.util.ArrayDeque) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) ValueSource(org.apache.lucene.queries.function.ValueSource) FunctionValues(org.apache.lucene.queries.function.FunctionValues)

Example 3 with FilterNumericDocValues

use of org.apache.lucene.index.FilterNumericDocValues in project lucene-solr by apache.

the class ToParentBlockJoinSortField method getDoubleComparator.

private FieldComparator<?> getDoubleComparator(int numHits) {
    return new FieldComparator.DoubleComparator(numHits, getField(), (Double) missingValue) {

        @Override
        protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
            SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
            final BlockJoinSelector.Type type = order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
            final BitSet parents = parentFilter.getBitSet(context);
            final BitSet children = childFilter.getBitSet(context);
            if (children == null) {
                return DocValues.emptyNumeric();
            }
            return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, children)) {

                @Override
                public long longValue() throws IOException {
                    // undo the numericutils sortability
                    return NumericUtils.sortableDoubleBits(super.longValue());
                }
            };
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) BitSet(org.apache.lucene.util.BitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues)

Example 4 with FilterNumericDocValues

use of org.apache.lucene.index.FilterNumericDocValues in project lucene-solr by apache.

the class SortedNumericSelector method wrap.

/** 
   * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector 
   * and numericType.
   */
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
    if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) {
        throw new IllegalArgumentException("numericType must be a numeric type");
    }
    final NumericDocValues view;
    NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
    if (singleton != null) {
        // it's actually single-valued in practice, but indexed as multi-valued,
        // so just sort on the underlying single-valued dv directly.
        // regardless of selector type, this optimization is safe!
        view = singleton;
    } else {
        switch(selector) {
            case MIN:
                view = new MinValue(sortedNumeric);
                break;
            case MAX:
                view = new MaxValue(sortedNumeric);
                break;
            default:
                throw new AssertionError();
        }
    }
    // undo the numericutils sortability
    switch(numericType) {
        case FLOAT:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableFloatBits((int) in.longValue());
                }
            };
        case DOUBLE:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableDoubleBits(in.longValue());
                }
            };
        default:
            return view;
    }
}
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues)

Example 5 with FilterNumericDocValues

use of org.apache.lucene.index.FilterNumericDocValues in project lucene-solr by apache.

the class ToParentBlockJoinSortField method getFloatComparator.

private FieldComparator<?> getFloatComparator(int numHits) {
    return new FieldComparator.FloatComparator(numHits, getField(), (Float) missingValue) {

        @Override
        protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
            SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), field);
            final BlockJoinSelector.Type type = order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
            final BitSet parents = parentFilter.getBitSet(context);
            final BitSet children = childFilter.getBitSet(context);
            if (children == null) {
                return DocValues.emptyNumeric();
            }
            return new FilterNumericDocValues(BlockJoinSelector.wrap(sortedNumeric, type, parents, children)) {

                @Override
                public long longValue() throws IOException {
                    // undo the numericutils sortability
                    return NumericUtils.sortableFloatBits((int) super.longValue());
                }
            };
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) BitSet(org.apache.lucene.util.BitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues)

Aggregations

FilterNumericDocValues (org.apache.lucene.index.FilterNumericDocValues)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 IOException (java.io.IOException)2 BitSet (org.apache.lucene.util.BitSet)2 FieldType (org.apache.solr.schema.FieldType)2 NumberType (org.apache.solr.schema.NumberType)2 DocIterator (org.apache.solr.search.DocIterator)2 ArrayDeque (java.util.ArrayDeque)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 FunctionValues (org.apache.lucene.queries.function.FunctionValues)1 ValueSource (org.apache.lucene.queries.function.ValueSource)1 BytesRef (org.apache.lucene.util.BytesRef)1 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)1 NamedList (org.apache.solr.common.util.NamedList)1 SchemaField (org.apache.solr.schema.SchemaField)1