Search in sources :

Example 41 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method writeValues.

private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
    int numDocsWithValue = 0;
    MinMaxTracker minMax = new MinMaxTracker();
    MinMaxTracker blockMinMax = new MinMaxTracker();
    long gcd = 0;
    Set<Long> uniqueValues = new HashSet<>();
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (int i = 0, count = values.docValueCount(); i < count; ++i) {
            long v = values.nextValue();
            if (gcd != 1) {
                if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
                    // in that case v - minValue might overflow and make the GCD computation return
                    // wrong results. Since these extreme values are unlikely, we just discard
                    // GCD computation for them
                    gcd = 1;
                } else if (minMax.numValues != 0) {
                    // minValue needs to be set first
                    gcd = MathUtil.gcd(gcd, v - minMax.min);
                }
            }
            minMax.update(v);
            blockMinMax.update(v);
            if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) {
                blockMinMax.nextBlock();
            }
            if (uniqueValues != null && uniqueValues.add(v) && uniqueValues.size() > 256) {
                uniqueValues = null;
            }
        }
        numDocsWithValue++;
    }
    minMax.finish();
    blockMinMax.finish();
    final long numValues = minMax.numValues;
    long min = minMax.min;
    final long max = minMax.max;
    assert blockMinMax.spaceInBits <= minMax.spaceInBits;
    if (numDocsWithValue == 0) {
        meta.writeLong(-2);
        meta.writeLong(0L);
    } else if (numDocsWithValue == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSortedNumeric(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    meta.writeLong(numValues);
    final int numBitsPerValue;
    boolean doBlocks = false;
    Map<Long, Integer> encode = null;
    if (min >= max) {
        numBitsPerValue = 0;
        meta.writeInt(-1);
    } else {
        if (uniqueValues != null && uniqueValues.size() > 1 && DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1) < DirectWriter.unsignedBitsRequired((max - min) / gcd)) {
            numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1);
            final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]);
            Arrays.sort(sortedUniqueValues);
            meta.writeInt(sortedUniqueValues.length);
            for (Long v : sortedUniqueValues) {
                meta.writeLong(v);
            }
            encode = new HashMap<>();
            for (int i = 0; i < sortedUniqueValues.length; ++i) {
                encode.put(sortedUniqueValues[i], i);
            }
            min = 0;
            gcd = 1;
        } else {
            uniqueValues = null;
            // we do blocks if that appears to save 10+% storage
            doBlocks = minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9;
            if (doBlocks) {
                numBitsPerValue = 0xFF;
                meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT);
            } else {
                numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
                if (gcd == 1 && min > 0 && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
                    min = 0;
                }
                meta.writeInt(-1);
            }
        }
    }
    meta.writeByte((byte) numBitsPerValue);
    meta.writeLong(min);
    meta.writeLong(gcd);
    long startOffset = data.getFilePointer();
    meta.writeLong(startOffset);
    if (doBlocks) {
        writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd);
    } else if (numBitsPerValue != 0) {
        writeValuesSingleBlock(valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, encode);
    }
    meta.writeLong(data.getFilePointer() - startOffset);
    return new long[] { numDocsWithValue, numValues };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) HashSet(java.util.HashSet)

Example 42 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class Lucene70DocValuesProducer method getSortedNumeric.

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
    SortedNumericEntry entry = sortedNumerics.get(field.name);
    if (entry.numValues == entry.numDocsWithField) {
        return DocValues.singleton(getNumeric(entry));
    }
    final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
    final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput);
    final LongValues values = getNumericValues(entry);
    if (entry.docsWithFieldOffset == -1) {
        // dense
        return new SortedNumericDocValues() {

            int doc = -1;

            long start, end;

            int count;

            @Override
            public int nextDoc() throws IOException {
                return advance(doc + 1);
            }

            @Override
            public int docID() {
                return doc;
            }

            @Override
            public long cost() {
                return maxDoc;
            }

            @Override
            public int advance(int target) throws IOException {
                if (target >= maxDoc) {
                    return doc = NO_MORE_DOCS;
                }
                start = addresses.get(target);
                end = addresses.get(target + 1L);
                count = (int) (end - start);
                return doc = target;
            }

            @Override
            public boolean advanceExact(int target) throws IOException {
                start = addresses.get(target);
                end = addresses.get(target + 1L);
                count = (int) (end - start);
                doc = target;
                return true;
            }

            @Override
            public long nextValue() throws IOException {
                return values.get(start++);
            }

            @Override
            public int docValueCount() {
                return count;
            }
        };
    } else {
        // sparse
        final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
        return new SortedNumericDocValues() {

            boolean set;

            long start, end;

            int count;

            @Override
            public int nextDoc() throws IOException {
                set = false;
                return disi.nextDoc();
            }

            @Override
            public int docID() {
                return disi.docID();
            }

            @Override
            public long cost() {
                return disi.cost();
            }

            @Override
            public int advance(int target) throws IOException {
                set = false;
                return disi.advance(target);
            }

            @Override
            public boolean advanceExact(int target) throws IOException {
                set = false;
                return disi.advanceExact(target);
            }

            @Override
            public long nextValue() throws IOException {
                set();
                return values.get(start++);
            }

            @Override
            public int docValueCount() {
                set();
                return count;
            }

            private void set() {
                if (set == false) {
                    final int index = disi.index();
                    start = addresses.get(index);
                    end = addresses.get(index + 1L);
                    count = (int) (end - start);
                    set = true;
                }
            }
        };
    }
}
Also used : RandomAccessInput(org.apache.lucene.store.RandomAccessInput) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LongValues(org.apache.lucene.util.LongValues)

Example 43 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addSortedNumericField.

@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.SORTED_NUMERIC);
    long[] stats = writeValues(field, valuesProducer);
    int numDocsWithField = Math.toIntExact(stats[0]);
    long numValues = stats[1];
    assert numValues >= numDocsWithField;
    meta.writeInt(numDocsWithField);
    if (numValues > numDocsWithField) {
        long start = data.getFilePointer();
        meta.writeLong(start);
        meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
        final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
        long addr = 0;
        addressesWriter.add(addr);
        SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
        for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
            addr += values.docValueCount();
            addressesWriter.add(addr);
        }
        addressesWriter.finish();
        meta.writeLong(data.getFilePointer() - start);
    }
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter)

Example 44 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class LatLonDocValuesDistanceQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        private final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
            if (values == null) {
                return null;
            }
            final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {

                @Override
                public boolean matches() throws IOException {
                    for (int i = 0, count = values.docValueCount(); i < count; ++i) {
                        final long value = values.nextValue();
                        final int lat = (int) (value >>> 32);
                        final int lon = (int) (value & 0xFFFFFFFF);
                        if (distancePredicate.test(lat, lon)) {
                            return true;
                        }
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    // TODO: what should it be?
                    return 100f;
                }
            };
            return new ConstantScoreScorer(this, boost, iterator);
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight)

Aggregations

SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)44 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)23 NumericDocValues (org.apache.lucene.index.NumericDocValues)13 LeafReader (org.apache.lucene.index.LeafReader)10 BytesRef (org.apache.lucene.util.BytesRef)7 Document (org.apache.lucene.document.Document)6 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)6 Directory (org.apache.lucene.store.Directory)6 IndexService (org.elasticsearch.index.IndexService)6 Searcher (org.elasticsearch.index.engine.Engine.Searcher)6 QueryShardContext (org.elasticsearch.index.query.QueryShardContext)6 IOException (java.io.IOException)5 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 SortedDocValues (org.apache.lucene.index.SortedDocValues)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 BitSet (org.apache.lucene.util.BitSet)4