Search in sources :

Example 11 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getNumeric.

@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
    NumericEntry entry = numerics.get(field.name);
    Bits docsWithField;
    if (entry.format == SPARSE_COMPRESSED) {
        return getSparseNumericDocValues(entry);
    } else {
        if (entry.missingOffset == ALL_MISSING) {
            return DocValues.emptyNumeric();
        } else if (entry.missingOffset == ALL_LIVE) {
            LongValues values = getNumeric(entry);
            return new NumericDocValues() {

                private int docID = -1;

                @Override
                public int docID() {
                    return docID;
                }

                @Override
                public int nextDoc() {
                    docID++;
                    if (docID == maxDoc) {
                        docID = NO_MORE_DOCS;
                    }
                    return docID;
                }

                @Override
                public int advance(int target) {
                    if (target >= maxDoc) {
                        docID = NO_MORE_DOCS;
                    } else {
                        docID = target;
                    }
                    return docID;
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    docID = target;
                    return true;
                }

                @Override
                public long cost() {
                    // TODO
                    return 0;
                }

                @Override
                public long longValue() {
                    return values.get(docID);
                }
            };
        } else {
            docsWithField = getLiveBits(entry.missingOffset, maxDoc);
        }
    }
    final LongValues values = getNumeric(entry);
    return new NumericDocValues() {

        int doc = -1;

        long value;

        @Override
        public long longValue() throws IOException {
            return value;
        }

        @Override
        public int docID() {
            return doc;
        }

        @Override
        public int nextDoc() throws IOException {
            return advance(doc + 1);
        }

        @Override
        public int advance(int target) throws IOException {
            for (int doc = target; doc < maxDoc; ++doc) {
                value = values.get(doc);
                if (value != 0 || docsWithField.get(doc)) {
                    return this.doc = doc;
                }
            }
            return doc = NO_MORE_DOCS;
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            doc = target;
            value = values.get(doc);
            return value != 0 || docsWithField.get(doc);
        }

        @Override
        public long cost() {
            return maxDoc;
        }
    };
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) Bits(org.apache.lucene.util.Bits) LongValues(org.apache.lucene.util.LongValues) IOException(java.io.IOException)

Example 12 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getSortedSetTable.

private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
    final long valueCount = binaries.get(field.name).count;
    final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
    final NumericEntry ordinalsEntry = ords.get(field.name);
    final LongValues ordinals = getNumeric(ordinalsEntry);
    final long[] table = ss.table;
    final int[] offsets = ss.tableOffsets;
    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {

        int offset, startOffset, endOffset;

        @Override
        public void setDocument(int docID) {
            final int ord = (int) ordinals.get(docID);
            offset = startOffset = offsets[ord];
            endOffset = offsets[ord + 1];
        }

        @Override
        public long nextOrd() {
            if (offset == endOffset) {
                return NO_MORE_ORDS;
            } else {
                return table[offset++];
            }
        }

        @Override
        public BytesRef lookupOrd(long ord) {
            return binary.get(ord);
        }

        @Override
        public long getValueCount() {
            return valueCount;
        }

        @Override
        public long lookupTerm(BytesRef key) {
            if (binary instanceof CompressedBinaryDocValues) {
                return ((CompressedBinaryDocValues) binary).lookupTerm(key);
            } else {
                return super.lookupTerm(key);
            }
        }

        @Override
        public TermsEnum termsEnum() throws IOException {
            if (binary instanceof CompressedBinaryDocValues) {
                return ((CompressedBinaryDocValues) binary).getTermsEnum();
            } else {
                return super.termsEnum();
            }
        }
    }, maxDoc);
}
Also used : IOException(java.io.IOException) LongValues(org.apache.lucene.util.LongValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 13 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class Lucene54DocValuesProducer method getSortedNumeric.

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
    SortedSetEntry ss = sortedNumerics.get(field.name);
    if (ss.format == SORTED_SINGLE_VALUED) {
        NumericEntry numericEntry = numerics.get(field.name);
        final LongValues values = getNumeric(numericEntry);
        if (numericEntry.format == SPARSE_COMPRESSED) {
            SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
            return new SortedNumericDocValues() {

                @Override
                public long nextValue() throws IOException {
                    return sparseValues.longValue();
                }

                @Override
                public int docValueCount() {
                    return 1;
                }

                @Override
                public int docID() {
                    return sparseValues.docID();
                }

                @Override
                public int nextDoc() throws IOException {
                    return sparseValues.nextDoc();
                }

                @Override
                public int advance(int target) throws IOException {
                    return sparseValues.advance(target);
                }

                @Override
                public boolean advanceExact(int target) throws IOException {
                    return sparseValues.advanceExact(target);
                }

                @Override
                public long cost() {
                    return sparseValues.cost();
                }
            };
        }
        final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
        return new SortedNumericDocValues() {

            int docID = -1;

            @Override
            public int docID() {
                return docID;
            }

            @Override
            public int nextDoc() {
                while (true) {
                    docID++;
                    if (docID == maxDoc) {
                        docID = NO_MORE_DOCS;
                        break;
                    }
                    if (docsWithField.get(docID)) {
                        // TODO: use .nextSetBit here, at least!!
                        break;
                    }
                }
                return docID;
            }

            @Override
            public int advance(int target) {
                if (target >= maxDoc) {
                    docID = NO_MORE_DOCS;
                    return docID;
                } else {
                    docID = target - 1;
                    return nextDoc();
                }
            }

            @Override
            public boolean advanceExact(int target) throws IOException {
                docID = target;
                return docsWithField.get(docID);
            }

            @Override
            public long cost() {
                // TODO
                return 0;
            }

            @Override
            public int docValueCount() {
                return 1;
            }

            @Override
            public long nextValue() {
                return values.get(docID);
            }
        };
    } else if (ss.format == SORTED_WITH_ADDRESSES) {
        NumericEntry numericEntry = numerics.get(field.name);
        final LongValues values = getNumeric(numericEntry);
        final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
        return new SortedNumericDocValues() {

            long startOffset;

            long endOffset;

            int docID = -1;

            long upto;

            @Override
            public int docID() {
                return docID;
            }

            @Override
            public int nextDoc() {
                while (true) {
                    docID++;
                    if (docID == maxDoc) {
                        docID = NO_MORE_DOCS;
                        return docID;
                    }
                    startOffset = ordIndex.get(docID);
                    endOffset = ordIndex.get(docID + 1L);
                    if (endOffset > startOffset) {
                        break;
                    }
                }
                upto = startOffset;
                return docID;
            }

            @Override
            public int advance(int target) {
                if (target >= maxDoc) {
                    docID = NO_MORE_DOCS;
                    return docID;
                } else {
                    docID = target - 1;
                    return nextDoc();
                }
            }

            @Override
            public boolean advanceExact(int target) throws IOException {
                docID = target;
                startOffset = ordIndex.get(docID);
                endOffset = ordIndex.get(docID + 1L);
                return endOffset > startOffset;
            }

            @Override
            public long cost() {
                // TODO
                return 0;
            }

            @Override
            public int docValueCount() {
                return (int) (endOffset - startOffset);
            }

            @Override
            public long nextValue() {
                return values.get(upto++);
            }
        };
    } else if (ss.format == SORTED_SET_TABLE) {
        NumericEntry entry = ords.get(field.name);
        final LongValues ordinals = getNumeric(entry);
        final long[] table = ss.table;
        final int[] offsets = ss.tableOffsets;
        return new SortedNumericDocValues() {

            int startOffset;

            int endOffset;

            int docID = -1;

            int upto;

            @Override
            public int docID() {
                return docID;
            }

            @Override
            public int nextDoc() {
                while (true) {
                    docID++;
                    if (docID == maxDoc) {
                        docID = NO_MORE_DOCS;
                        return docID;
                    }
                    int ord = (int) ordinals.get(docID);
                    startOffset = offsets[ord];
                    endOffset = offsets[ord + 1];
                    if (endOffset > startOffset) {
                        break;
                    }
                }
                upto = startOffset;
                return docID;
            }

            @Override
            public int advance(int target) {
                if (target >= maxDoc) {
                    docID = NO_MORE_DOCS;
                    return docID;
                } else {
                    docID = target - 1;
                    return nextDoc();
                }
            }

            @Override
            public boolean advanceExact(int target) throws IOException {
                docID = target;
                int ord = (int) ordinals.get(docID);
                startOffset = offsets[ord];
                endOffset = offsets[ord + 1];
                return endOffset > startOffset;
            }

            @Override
            public long cost() {
                // TODO
                return 0;
            }

            @Override
            public int docValueCount() {
                return endOffset - startOffset;
            }

            @Override
            public long nextValue() {
                return table[upto++];
            }
        };
    } else {
        throw new AssertionError();
    }
}
Also used : IOException(java.io.IOException) LongValues(org.apache.lucene.util.LongValues) Bits(org.apache.lucene.util.Bits)

Example 14 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class TestLucene54DocValuesFormat method testSparseLongValues.

public void testSparseLongValues() throws IOException {
    final int iters = atLeast(5);
    for (int iter = 0; iter < iters; ++iter) {
        final int numDocs = TestUtil.nextInt(random(), 0, 100);
        final int[] docIds = new int[numDocs];
        final long[] values = new long[numDocs];
        final int maxDoc;
        if (numDocs == 0) {
            maxDoc = 1 + random().nextInt(10);
        } else {
            docIds[0] = random().nextInt(10);
            for (int i = 1; i < docIds.length; ++i) {
                docIds[i] = docIds[i - 1] + 1 + random().nextInt(100);
            }
            maxDoc = docIds[numDocs - 1] + 1 + random().nextInt(10);
        }
        for (int i = 0; i < values.length; ++i) {
            values[i] = random().nextLong();
        }
        final long missingValue = random().nextLong();
        final LongValues docIdsValues = new LongValues() {

            @Override
            public long get(long index) {
                return docIds[Math.toIntExact(index)];
            }
        };
        final LongValues valuesValues = new LongValues() {

            @Override
            public long get(long index) {
                return values[Math.toIntExact(index)];
            }
        };
        final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
        // sequential access
        assertEquals(-1, sparseValues.docID());
        for (int i = 0; i < docIds.length; ++i) {
            assertEquals(docIds[i], sparseValues.nextDoc());
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
        // advance
        for (int i = 0; i < 2000; ++i) {
            final int target = TestUtil.nextInt(random(), 0, maxDoc);
            int index = Arrays.binarySearch(docIds, target);
            if (index < 0) {
                index = -1 - index;
            }
            sparseValues.reset();
            if (index > 0) {
                assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
            }
            if (index == docIds.length) {
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
            } else {
                assertEquals(docIds[index], sparseValues.advance(target));
            }
        }
        // advanceExact
        for (int i = 0; i < 2000; ++i) {
            sparseValues.reset();
            if (random().nextBoolean() && docIds.length > 0) {
                sparseValues.advance(docIds[TestUtil.nextInt(random(), 0, docIds.length - 1)]);
            }
            final int target = TestUtil.nextInt(random(), Math.max(0, sparseValues.docID()), maxDoc - 1);
            final boolean exists = sparseValues.advanceExact(target);
            final int index = Arrays.binarySearch(docIds, target);
            assertEquals(index >= 0, exists);
            assertEquals(target, sparseValues.docID());
            final boolean exists2 = sparseValues.advanceExact(target);
            assertEquals(index >= 0, exists2);
            assertEquals(target, sparseValues.docID());
            final int nextIndex = index >= 0 ? index + 1 : -1 - index;
            if (nextIndex >= docIds.length) {
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
            } else {
                assertEquals(docIds[nextIndex], sparseValues.nextDoc());
            }
        }
        final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
        // random-access
        for (int i = 0; i < 2000; ++i) {
            final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
            final int idx = Arrays.binarySearch(docIds, docId);
            final long value = raWrapper.get(docId);
            if (idx >= 0) {
                assertEquals(values[idx], value);
            } else {
                assertEquals(missingValue, value);
            }
        }
        // sequential access
        for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
            final int idx = Arrays.binarySearch(docIds, docId);
            final long value = raWrapper.get(docId);
            if (idx >= 0) {
                assertEquals(values[idx], value);
            } else {
                assertEquals(missingValue, value);
            }
        }
    }
}
Also used : LongValues(org.apache.lucene.util.LongValues) SparseNumericDocValuesRandomAccessWrapper(org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper) SparseNumericDocValues(org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues)

Example 15 with LongValues

use of org.apache.lucene.util.LongValues in project lucene-solr by apache.

the class FacetFieldProcessorByArrayDV method collectDocs.

@Override
protected void collectDocs() throws IOException {
    int domainSize = fcontext.base.size();
    if (nTerms <= 0 || domainSize < effectiveMincount) {
        // TODO: what about allBuckets? missing bucket?
        return;
    }
    // TODO: refactor some of this logic into a base class
    boolean countOnly = collectAcc == null && allBucketsAcc == null;
    boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
    // Are we expecting many hits per bucket?
    // FUTURE: pro-rate for nTerms?
    // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
    // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
    // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
    // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
    // this was for heap docvalues produced by UninvertingReader
    // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
    long domainMultiplier = multiValuedField ? 4L : 2L;
    // +3 to increase test coverage with small tests
    boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
    // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
    // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
    // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
    // the docid is not used)
    boolean canDoPerSeg = countOnly && fullRange;
    boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
    // internal - override perSeg heuristic
    if (freq.perSeg != null)
        accumSeg = canDoPerSeg && freq.perSeg;
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    Filter filter = fcontext.base.getTopFilter();
    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
        LeafReaderContext subCtx = leaves.get(subIdx);
        setNextReaderFirstPhase(subCtx);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(subCtx, null);
        DocIdSetIterator disi = dis.iterator();
        SortedDocValues singleDv = null;
        SortedSetDocValues multiDv = null;
        if (multiValuedField) {
            // TODO: get sub from multi?
            multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
            if (multiDv == null) {
                multiDv = DocValues.emptySortedSet();
            }
            // this will be null if this is not a wrapped single valued docvalues.
            if (unwrap_singleValued_multiDv) {
                singleDv = DocValues.unwrapSingleton(multiDv);
            }
        } else {
            singleDv = subCtx.reader().getSortedDocValues(sf.getName());
            if (singleDv == null) {
                singleDv = DocValues.emptySorted();
            }
        }
        LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
        if (singleDv != null) {
            if (accumSeg) {
                collectPerSeg(singleDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(singleDv, disi, toGlobal);
                } else {
                    collectDocs(singleDv, disi, toGlobal);
                }
            }
        } else {
            if (accumSeg) {
                collectPerSeg(multiDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(multiDv, disi, toGlobal);
                } else {
                    collectDocs(multiDv, disi, toGlobal);
                }
            }
        }
    }
    // better GC
    reuse = null;
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSet(org.apache.lucene.search.DocIdSet) LongValues(org.apache.lucene.util.LongValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Aggregations

LongValues (org.apache.lucene.util.LongValues)31 IOException (java.io.IOException)8 RandomAccessInput (org.apache.lucene.store.RandomAccessInput)8 IndexInput (org.apache.lucene.store.IndexInput)7 BytesRef (org.apache.lucene.util.BytesRef)6 IndexOutput (org.apache.lucene.store.IndexOutput)5 Directory (org.apache.lucene.store.Directory)4 ArrayList (java.util.ArrayList)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)3 Bits (org.apache.lucene.util.Bits)3 MultiDocValues (org.apache.lucene.index.MultiDocValues)2 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)2 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 DirectWriter (org.apache.lucene.util.packed.DirectWriter)2 FieldFacetStats (org.apache.solr.handler.component.FieldFacetStats)2 IntHashSet (com.carrotsearch.hppc.IntHashSet)1 IntObjectHashMap (com.carrotsearch.hppc.IntObjectHashMap)1