Search in sources :

Example 1 with DirectMonotonicWriter

use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.

the class Lucene54DocValuesConsumer method addOrdIndex.

// writes addressing information as MONOTONIC_COMPRESSED integer
private void addOrdIndex(FieldInfo field, Iterable<Number> values) throws IOException {
    meta.writeVInt(field.number);
    meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
    meta.writeVInt(MONOTONIC_COMPRESSED);
    meta.writeLong(-1L);
    meta.writeLong(data.getFilePointer());
    meta.writeVLong(maxDoc);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, data, maxDoc + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    writer.add(addr);
    for (Number v : values) {
        addr += v.longValue();
        writer.add(addr);
    }
    writer.finish();
    meta.writeLong(data.getFilePointer());
}
Also used : DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter)

Example 2 with DirectMonotonicWriter

use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.

the class Lucene54DocValuesConsumer method addBinaryField.

private void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
    // write the byte[] data
    meta.writeVInt(field.number);
    meta.writeByte(Lucene54DocValuesFormat.BINARY);
    int minLength = Integer.MAX_VALUE;
    int maxLength = Integer.MIN_VALUE;
    final long startFP = data.getFilePointer();
    long count = 0;
    long missingCount = 0;
    for (BytesRef v : values) {
        final int length;
        if (v == null) {
            length = 0;
            missingCount++;
        } else {
            length = v.length;
        }
        minLength = Math.min(minLength, length);
        maxLength = Math.max(maxLength, length);
        if (v != null) {
            data.writeBytes(v.bytes, v.offset, v.length);
        }
        count++;
    }
    meta.writeVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED);
    if (missingCount == 0) {
        meta.writeLong(ALL_LIVE);
    } else if (missingCount == count) {
        meta.writeLong(ALL_MISSING);
    } else {
        meta.writeLong(data.getFilePointer());
        writeMissingBitset(values);
    }
    meta.writeVInt(minLength);
    meta.writeVInt(maxLength);
    meta.writeVLong(count);
    meta.writeLong(startFP);
    // otherwise, we need to record the length fields...
    if (minLength != maxLength) {
        meta.writeLong(data.getFilePointer());
        meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
        final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, data, count + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
        long addr = 0;
        writer.add(addr);
        for (BytesRef v : values) {
            if (v != null) {
                addr += v.length;
            }
            writer.add(addr);
        }
        writer.finish();
        meta.writeLong(data.getFilePointer());
    }
}
Also used : DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with DirectMonotonicWriter

use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.

the class Lucene54DocValuesConsumer method writeSparseMissingBitset.

long writeSparseMissingBitset(Iterable<Number> values, NumberType numberType, long numDocsWithValue) throws IOException {
    meta.writeVLong(numDocsWithValue);
    // Write doc IDs that have a value
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter docIdsWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithValue, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long docID = 0;
    for (Number nv : values) {
        switch(numberType) {
            case VALUE:
                if (nv != null) {
                    docIdsWriter.add(docID);
                }
                break;
            case ORDINAL:
                if (nv.longValue() != -1L) {
                    docIdsWriter.add(docID);
                }
                break;
            default:
                throw new AssertionError();
        }
        docID++;
    }
    docIdsWriter.finish();
    return docID;
}
Also used : DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter)

Example 4 with DirectMonotonicWriter

use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addTermsDict.

private void addTermsDict(SortedSetDocValues values) throws IOException {
    final long size = values.getValueCount();
    meta.writeVLong(size);
    meta.writeInt(Lucene70DocValuesFormat.TERMS_DICT_BLOCK_SHIFT);
    RAMOutputStream addressBuffer = new RAMOutputStream();
    meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    long numBlocks = (size + Lucene70DocValuesFormat.TERMS_DICT_BLOCK_MASK) >>> Lucene70DocValuesFormat.TERMS_DICT_BLOCK_SHIFT;
    DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, addressBuffer, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);
    BytesRefBuilder previous = new BytesRefBuilder();
    long ord = 0;
    long start = data.getFilePointer();
    int maxLength = 0;
    TermsEnum iterator = values.termsEnum();
    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
        if ((ord & Lucene70DocValuesFormat.TERMS_DICT_BLOCK_MASK) == 0) {
            writer.add(data.getFilePointer() - start);
            data.writeVInt(term.length);
            data.writeBytes(term.bytes, term.offset, term.length);
        } else {
            final int prefixLength = StringHelper.bytesDifference(previous.get(), term);
            final int suffixLength = term.length - prefixLength;
            // terms are unique
            assert suffixLength > 0;
            data.writeByte((byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4)));
            if (prefixLength >= 15) {
                data.writeVInt(prefixLength - 15);
            }
            if (suffixLength >= 16) {
                data.writeVInt(suffixLength - 16);
            }
            data.writeBytes(term.bytes, term.offset + prefixLength, term.length - prefixLength);
        }
        maxLength = Math.max(maxLength, term.length);
        previous.copyBytes(term);
        ++ord;
    }
    writer.finish();
    meta.writeInt(maxLength);
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
    start = data.getFilePointer();
    addressBuffer.writeTo(data);
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
    // Now write the reverse terms index
    writeTermsIndex(values);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) RAMOutputStream(org.apache.lucene.store.RAMOutputStream) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 5 with DirectMonotonicWriter

use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.

the class Lucene70DocValuesConsumer method addSortedSetField.

@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    meta.writeInt(field.number);
    meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
    SortedSetDocValues values = valuesProducer.getSortedSet(field);
    int numDocsWithField = 0;
    long numOrds = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        numDocsWithField++;
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            numOrds++;
        }
    }
    if (numDocsWithField == numOrds) {
        meta.writeByte((byte) 0);
        doAddSortedField(field, new EmptyDocValuesProducer() {

            @Override
            public SortedDocValues getSorted(FieldInfo field) throws IOException {
                return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
            }
        });
        return;
    }
    meta.writeByte((byte) 1);
    assert numDocsWithField != 0;
    if (numDocsWithField == maxDoc) {
        meta.writeLong(-1);
        meta.writeLong(0L);
    } else {
        long offset = data.getFilePointer();
        meta.writeLong(offset);
        values = valuesProducer.getSortedSet(field);
        IndexedDISI.writeBitSet(values, data);
        meta.writeLong(data.getFilePointer() - offset);
    }
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd);
    long start = data.getFilePointer();
    meta.writeLong(start);
    DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            writer.add(ord);
        }
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start);
    meta.writeInt(numDocsWithField);
    start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        values.nextOrd();
        addr++;
        while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
            addr++;
        }
        addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
    addTermsDict(values);
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) EmptyDocValuesProducer(org.apache.lucene.index.EmptyDocValuesProducer) DirectWriter(org.apache.lucene.util.packed.DirectWriter) IOException(java.io.IOException) DirectMonotonicWriter(org.apache.lucene.util.packed.DirectMonotonicWriter) FieldInfo(org.apache.lucene.index.FieldInfo) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Aggregations

DirectMonotonicWriter (org.apache.lucene.util.packed.DirectMonotonicWriter)8 BytesRef (org.apache.lucene.util.BytesRef)4 TermsEnum (org.apache.lucene.index.TermsEnum)2 RAMOutputStream (org.apache.lucene.store.RAMOutputStream)2 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)2 IOException (java.io.IOException)1 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)1 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)1 FieldInfo (org.apache.lucene.index.FieldInfo)1 SortedDocValues (org.apache.lucene.index.SortedDocValues)1 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)1 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)1 DirectWriter (org.apache.lucene.util.packed.DirectWriter)1