Search in sources :

Example 1 with DirectWriter

use of org.apache.lucene.util.packed.DirectWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method writeValuesSingleBlock.

private void writeValuesSingleBlock(SortedNumericDocValues values, long numValues, int numBitsPerValue, long min, long gcd, Map<Long, Integer> encode) throws IOException {
    DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (int i = 0, count = values.docValueCount(); i < count; ++i) {
            long v = values.nextValue();
            if (encode == null) {
                writer.add((v - min) / gcd);
            } else {
                writer.add(encode.get(v));
            }
        }
    }
    writer.finish();
}
Also used : DirectWriter(org.apache.lucene.util.packed.DirectWriter)

Example 2 with DirectWriter

use of org.apache.lucene.util.packed.DirectWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method writeBlock.

private void writeBlock(long[] values, int length, long gcd, GrowableByteArrayDataOutput buffer) throws IOException {
    assert length > 0;
    long min = values[0];
    long max = values[0];
    for (int i = 1; i < length; ++i) {
        final long v = values[i];
        assert Math.floorMod(values[i] - min, gcd) == 0;
        min = Math.min(min, v);
        max = Math.max(max, v);
    }
    if (min == max) {
        data.writeByte((byte) 0);
        data.writeLong(min);
    } else {
        final int bitsPerValue = DirectWriter.unsignedBitsRequired(max - min);
        buffer.reset();
        assert buffer.getPosition() == 0;
        final DirectWriter w = DirectWriter.getInstance(buffer, length, bitsPerValue);
        for (int i = 0; i < length; ++i) {
            w.add((values[i] - min) / gcd);
        }
        w.finish();
        data.writeByte((byte) bitsPerValue);
        data.writeLong(min);
        data.writeInt(buffer.getPosition());
        data.writeBytes(buffer.getBytes(), buffer.getPosition());
    }
}
Also used : DirectWriter(org.apache.lucene.util.packed.DirectWriter)

Example 3 with DirectWriter

use of org.apache.lucene.util.packed.DirectWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method doAddSortedField.

private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    SortedDocValues values = valuesProducer.getSorted(field);
    int numDocsWithField = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        numDocsWithField++;
    }
    if (numDocsWithField == 0) {
        meta.writeLong(-2);
        meta.writeLong(0L);
    } else if (numDocsWithField == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSorted(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    meta.writeInt(numDocsWithField);
    if (values.getValueCount() <= 1) {
        meta.writeByte((byte) 0);
        meta.writeLong(0L);
        meta.writeLong(0L);
    } else {
        int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
        meta.writeByte((byte) numberOfBitsPerOrd);
        long start = data.getFilePointer();
        meta.writeLong(start);
        DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd);
        values = valuesProducer.getSorted(field);
        for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
            writer.add(values.ordValue());
        }
        writer.finish();
        meta.writeLong(data.getFilePointer() - start);
    }
    addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
Also used : DirectWriter(org.apache.lucene.util.packed.DirectWriter) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 4 with DirectWriter

use of org.apache.lucene.util.packed.DirectWriter in project lucene-solr by apache.

the class Lucene54DocValuesConsumer method addNumericField.

void addNumericField(FieldInfo field, Iterable<Number> values, NumberType numberType) throws IOException {
    long count = 0;
    long minValue = Long.MAX_VALUE;
    long maxValue = Long.MIN_VALUE;
    long gcd = 0;
    long missingCount = 0;
    long zeroCount = 0;
    // TODO: more efficient?
    HashSet<Long> uniqueValues = null;
    long missingOrdCount = 0;
    if (numberType == NumberType.VALUE) {
        uniqueValues = new HashSet<>();
        for (Number nv : values) {
            final long v;
            if (nv == null) {
                v = 0;
                missingCount++;
                zeroCount++;
            } else {
                v = nv.longValue();
                if (v == 0) {
                    zeroCount++;
                }
            }
            if (gcd != 1) {
                if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
                    // in that case v - minValue might overflow and make the GCD computation return
                    // wrong results. Since these extreme values are unlikely, we just discard
                    // GCD computation for them
                    gcd = 1;
                } else if (count != 0) {
                    // minValue needs to be set first
                    gcd = MathUtil.gcd(gcd, v - minValue);
                }
            }
            minValue = Math.min(minValue, v);
            maxValue = Math.max(maxValue, v);
            if (uniqueValues != null) {
                if (uniqueValues.add(v)) {
                    if (uniqueValues.size() > 256) {
                        uniqueValues = null;
                    }
                }
            }
            ++count;
        }
    } else {
        for (Number nv : values) {
            long v = nv.longValue();
            if (v == -1L) {
                missingOrdCount++;
            }
            minValue = Math.min(minValue, v);
            maxValue = Math.max(maxValue, v);
            ++count;
        }
    }
    final long delta = maxValue - minValue;
    final int deltaBitsRequired = DirectWriter.unsignedBitsRequired(delta);
    final int tableBitsRequired = uniqueValues == null ? Integer.MAX_VALUE : DirectWriter.bitsRequired(uniqueValues.size() - 1);
    // 1% of docs or less have a value
    final boolean sparse;
    switch(numberType) {
        case VALUE:
            sparse = (double) missingCount / count >= 0.99;
            break;
        case ORDINAL:
            sparse = (double) missingOrdCount / count >= 0.99;
            break;
        default:
            throw new AssertionError();
    }
    final int format;
    if (uniqueValues != null && count <= Integer.MAX_VALUE && (uniqueValues.size() == 1 || (uniqueValues.size() == 2 && missingCount > 0 && zeroCount == missingCount))) {
        // either one unique value C or two unique values: "missing" and C
        format = CONST_COMPRESSED;
    } else if (sparse && count >= 1024) {
        // require at least 1024 docs to avoid flipping back and forth when doing NRT search
        format = SPARSE_COMPRESSED;
    } else if (uniqueValues != null && tableBitsRequired < deltaBitsRequired) {
        format = TABLE_COMPRESSED;
    } else if (gcd != 0 && gcd != 1) {
        final long gcdDelta = (maxValue - minValue) / gcd;
        final long gcdBitsRequired = DirectWriter.unsignedBitsRequired(gcdDelta);
        format = gcdBitsRequired < deltaBitsRequired ? GCD_COMPRESSED : DELTA_COMPRESSED;
    } else {
        format = DELTA_COMPRESSED;
    }
    meta.writeVInt(field.number);
    meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
    meta.writeVInt(format);
    if (format == SPARSE_COMPRESSED) {
        meta.writeLong(data.getFilePointer());
        final long numDocsWithValue;
        switch(numberType) {
            case VALUE:
                numDocsWithValue = count - missingCount;
                break;
            case ORDINAL:
                numDocsWithValue = count - missingOrdCount;
                break;
            default:
                throw new AssertionError();
        }
        final long maxDoc = writeSparseMissingBitset(values, numberType, numDocsWithValue);
        assert maxDoc == count;
    } else if (missingCount == 0) {
        meta.writeLong(ALL_LIVE);
    } else if (missingCount == count) {
        meta.writeLong(ALL_MISSING);
    } else {
        meta.writeLong(data.getFilePointer());
        writeMissingBitset(values);
    }
    meta.writeLong(data.getFilePointer());
    meta.writeVLong(count);
    switch(format) {
        case CONST_COMPRESSED:
            // write the constant (nonzero value in the n=2 case, singleton value otherwise)
            meta.writeLong(minValue < 0 ? Collections.min(uniqueValues) : Collections.max(uniqueValues));
            break;
        case GCD_COMPRESSED:
            meta.writeLong(minValue);
            meta.writeLong(gcd);
            final long maxDelta = (maxValue - minValue) / gcd;
            final int bits = DirectWriter.unsignedBitsRequired(maxDelta);
            meta.writeVInt(bits);
            final DirectWriter quotientWriter = DirectWriter.getInstance(data, count, bits);
            for (Number nv : values) {
                long value = nv == null ? 0 : nv.longValue();
                quotientWriter.add((value - minValue) / gcd);
            }
            quotientWriter.finish();
            break;
        case DELTA_COMPRESSED:
            final long minDelta = delta < 0 ? 0 : minValue;
            meta.writeLong(minDelta);
            meta.writeVInt(deltaBitsRequired);
            final DirectWriter writer = DirectWriter.getInstance(data, count, deltaBitsRequired);
            for (Number nv : values) {
                long v = nv == null ? 0 : nv.longValue();
                writer.add(v - minDelta);
            }
            writer.finish();
            break;
        case TABLE_COMPRESSED:
            final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
            Arrays.sort(decode);
            final HashMap<Long, Integer> encode = new HashMap<>();
            meta.writeVInt(decode.length);
            for (int i = 0; i < decode.length; i++) {
                meta.writeLong(decode[i]);
                encode.put(decode[i], i);
            }
            meta.writeVInt(tableBitsRequired);
            final DirectWriter ordsWriter = DirectWriter.getInstance(data, count, tableBitsRequired);
            for (Number nv : values) {
                ordsWriter.add(encode.get(nv == null ? 0 : nv.longValue()));
            }
            ordsWriter.finish();
            break;
        case SPARSE_COMPRESSED:
            final Iterable<Number> filteredMissingValues;
            switch(numberType) {
                case VALUE:
                    meta.writeByte((byte) 0);
                    filteredMissingValues = new Iterable<Number>() {

                        @Override
                        public Iterator<Number> iterator() {
                            return StreamSupport.stream(values.spliterator(), false).filter(value -> value != null).iterator();
                        }
                    };
                    break;
                case ORDINAL:
                    meta.writeByte((byte) 1);
                    filteredMissingValues = new Iterable<Number>() {

                        @Override
                        public Iterator<Number> iterator() {
                            return StreamSupport.stream(values.spliterator(), false).filter(value -> value.longValue() != -1L).iterator();
                        }
                    };
                    break;
                default:
                    throw new AssertionError();
            }
            // Write non-missing values as a numeric field
            addNumericField(field, filteredMissingValues, numberType);
            break;
        default:
            throw new AssertionError();
    }
    meta.writeLong(data.getFilePointer());
}
Also used : HashMap(java.util.HashMap) Iterator(java.util.Iterator) DirectWriter(org.apache.lucene.util.packed.DirectWriter)

Example 5 with DirectWriter

use of org.apache.lucene.util.packed.DirectWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addSortedSetField.

@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
    SortedSetDocValues values = valuesProducer.getSortedSet(field);
    int numDocsWithField = 0;
    long numOrds = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        numDocsWithField++;
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            numOrds++;
        }
    }
    if (numDocsWithField == numOrds) {
        meta.writeByte((byte) 0);
        doAddSortedField(field, new EmptyDocValuesProducer() {

            @Override
            public SortedDocValues getSorted(FieldInfo field) throws IOException {
                return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
            }
        });
        return;
    }
    meta.writeByte((byte) 1);
    assert numDocsWithField != 0;
    if (numDocsWithField == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSortedSet(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd);
    long start = data.getFilePointer();
    meta.writeLong(start);
    DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            writer.add(ord);
        }
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start);
    meta.writeInt(numDocsWithField);
    start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        values.nextOrd();
        addr++;
        while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
            addr++;
        }
        addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
    addTermsDict(values);
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DirectWriter(org.apache.lucene.util.packed.DirectWriter) IOException(java.io.IOException) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) FieldInfo(org.apache.lucene.index.FieldInfo) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Aggregations

DirectWriter (org.apache.lucene.util.packed.DirectWriter)8 IndexOutput (org.apache.lucene.store.IndexOutput)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)2 Directory (org.apache.lucene.store.Directory)2 IndexInput (org.apache.lucene.store.IndexInput)2 LongValues (org.apache.lucene.util.LongValues)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)1 FieldInfo (org.apache.lucene.index.FieldInfo)1 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)1 DirectMonotonicWriter (org.apache.lucene.util.packed.DirectMonotonicWriter)1