use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method addOrdIndex.
// writes addressing information as MONOTONIC_COMPRESSED integer
private void addOrdIndex(FieldInfo field, Iterable<Number> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
meta.writeVInt(MONOTONIC_COMPRESSED);
meta.writeLong(-1L);
meta.writeLong(data.getFilePointer());
meta.writeVLong(maxDoc);
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, data, maxDoc + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
writer.add(addr);
for (Number v : values) {
addr += v.longValue();
writer.add(addr);
}
writer.finish();
meta.writeLong(data.getFilePointer());
}
use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method addBinaryField.
private void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
// write the byte[] data
meta.writeVInt(field.number);
meta.writeByte(Lucene54DocValuesFormat.BINARY);
int minLength = Integer.MAX_VALUE;
int maxLength = Integer.MIN_VALUE;
final long startFP = data.getFilePointer();
long count = 0;
long missingCount = 0;
for (BytesRef v : values) {
final int length;
if (v == null) {
length = 0;
missingCount++;
} else {
length = v.length;
}
minLength = Math.min(minLength, length);
maxLength = Math.max(maxLength, length);
if (v != null) {
data.writeBytes(v.bytes, v.offset, v.length);
}
count++;
}
meta.writeVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED);
if (missingCount == 0) {
meta.writeLong(ALL_LIVE);
} else if (missingCount == count) {
meta.writeLong(ALL_MISSING);
} else {
meta.writeLong(data.getFilePointer());
writeMissingBitset(values);
}
meta.writeVInt(minLength);
meta.writeVInt(maxLength);
meta.writeVLong(count);
meta.writeLong(startFP);
// otherwise, we need to record the length fields...
if (minLength != maxLength) {
meta.writeLong(data.getFilePointer());
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, data, count + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
writer.add(addr);
for (BytesRef v : values) {
if (v != null) {
addr += v.length;
}
writer.add(addr);
}
writer.finish();
meta.writeLong(data.getFilePointer());
}
}
use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.
the class Lucene54DocValuesConsumer method writeSparseMissingBitset.
long writeSparseMissingBitset(Iterable<Number> values, NumberType numberType, long numDocsWithValue) throws IOException {
meta.writeVLong(numDocsWithValue);
// Write doc IDs that have a value
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter docIdsWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithValue, DIRECT_MONOTONIC_BLOCK_SHIFT);
long docID = 0;
for (Number nv : values) {
switch(numberType) {
case VALUE:
if (nv != null) {
docIdsWriter.add(docID);
}
break;
case ORDINAL:
if (nv.longValue() != -1L) {
docIdsWriter.add(docID);
}
break;
default:
throw new AssertionError();
}
docID++;
}
docIdsWriter.finish();
return docID;
}
use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method addTermsDict.
private void addTermsDict(SortedSetDocValues values) throws IOException {
final long size = values.getValueCount();
meta.writeVLong(size);
meta.writeInt(Lucene70DocValuesFormat.TERMS_DICT_BLOCK_SHIFT);
RAMOutputStream addressBuffer = new RAMOutputStream();
meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
long numBlocks = (size + Lucene70DocValuesFormat.TERMS_DICT_BLOCK_MASK) >>> Lucene70DocValuesFormat.TERMS_DICT_BLOCK_SHIFT;
DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, addressBuffer, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);
BytesRefBuilder previous = new BytesRefBuilder();
long ord = 0;
long start = data.getFilePointer();
int maxLength = 0;
TermsEnum iterator = values.termsEnum();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
if ((ord & Lucene70DocValuesFormat.TERMS_DICT_BLOCK_MASK) == 0) {
writer.add(data.getFilePointer() - start);
data.writeVInt(term.length);
data.writeBytes(term.bytes, term.offset, term.length);
} else {
final int prefixLength = StringHelper.bytesDifference(previous.get(), term);
final int suffixLength = term.length - prefixLength;
// terms are unique
assert suffixLength > 0;
data.writeByte((byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4)));
if (prefixLength >= 15) {
data.writeVInt(prefixLength - 15);
}
if (suffixLength >= 16) {
data.writeVInt(suffixLength - 16);
}
data.writeBytes(term.bytes, term.offset + prefixLength, term.length - prefixLength);
}
maxLength = Math.max(maxLength, term.length);
previous.copyBytes(term);
++ord;
}
writer.finish();
meta.writeInt(maxLength);
meta.writeLong(start);
meta.writeLong(data.getFilePointer() - start);
start = data.getFilePointer();
addressBuffer.writeTo(data);
meta.writeLong(start);
meta.writeLong(data.getFilePointer() - start);
// Now write the reverse terms index
writeTermsIndex(values);
}
use of org.apache.lucene.util.packed.DirectMonotonicWriter in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method addSortedSetField.
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene70DocValuesFormat.SORTED_SET);
SortedSetDocValues values = valuesProducer.getSortedSet(field);
int numDocsWithField = 0;
long numOrds = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
numOrds++;
}
}
if (numDocsWithField == numOrds) {
meta.writeByte((byte) 0);
doAddSortedField(field, new EmptyDocValuesProducer() {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
}
});
return;
}
meta.writeByte((byte) 1);
assert numDocsWithField != 0;
if (numDocsWithField == maxDoc) {
meta.writeLong(-1);
meta.writeLong(0L);
} else {
long offset = data.getFilePointer();
meta.writeLong(offset);
values = valuesProducer.getSortedSet(field);
IndexedDISI.writeBitSet(values, data);
meta.writeLong(data.getFilePointer() - offset);
}
int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
meta.writeByte((byte) numberOfBitsPerOrd);
long start = data.getFilePointer();
meta.writeLong(start);
DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
writer.add(ord);
}
}
writer.finish();
meta.writeLong(data.getFilePointer() - start);
meta.writeInt(numDocsWithField);
start = data.getFilePointer();
meta.writeLong(start);
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
addressesWriter.add(addr);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
values.nextOrd();
addr++;
while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
addr++;
}
addressesWriter.add(addr);
}
addressesWriter.finish();
meta.writeLong(data.getFilePointer() - start);
addTermsDict(values);
}
Aggregations