use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method writeValues.
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
int numDocsWithValue = 0;
MinMaxTracker minMax = new MinMaxTracker();
MinMaxTracker blockMinMax = new MinMaxTracker();
long gcd = 0;
Set<Long> uniqueValues = new HashSet<>();
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
long v = values.nextValue();
if (gcd != 1) {
if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
// in that case v - minValue might overflow and make the GCD computation return
// wrong results. Since these extreme values are unlikely, we just discard
// GCD computation for them
gcd = 1;
} else if (minMax.numValues != 0) {
// minValue needs to be set first
gcd = MathUtil.gcd(gcd, v - minMax.min);
}
}
minMax.update(v);
blockMinMax.update(v);
if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) {
blockMinMax.nextBlock();
}
if (uniqueValues != null && uniqueValues.add(v) && uniqueValues.size() > 256) {
uniqueValues = null;
}
}
numDocsWithValue++;
}
minMax.finish();
blockMinMax.finish();
final long numValues = minMax.numValues;
long min = minMax.min;
final long max = minMax.max;
assert blockMinMax.spaceInBits <= minMax.spaceInBits;
if (numDocsWithValue == 0) {
meta.writeLong(-2);
meta.writeLong(0L);
} else if (numDocsWithValue == maxDoc) {
meta.writeLong(-1);
meta.writeLong(0L);
} else {
long offset = data.getFilePointer();
meta.writeLong(offset);
values = valuesProducer.getSortedNumeric(field);
IndexedDISI.writeBitSet(values, data);
meta.writeLong(data.getFilePointer() - offset);
}
meta.writeLong(numValues);
final int numBitsPerValue;
boolean doBlocks = false;
Map<Long, Integer> encode = null;
if (min >= max) {
numBitsPerValue = 0;
meta.writeInt(-1);
} else {
if (uniqueValues != null && uniqueValues.size() > 1 && DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1) < DirectWriter.unsignedBitsRequired((max - min) / gcd)) {
numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1);
final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]);
Arrays.sort(sortedUniqueValues);
meta.writeInt(sortedUniqueValues.length);
for (Long v : sortedUniqueValues) {
meta.writeLong(v);
}
encode = new HashMap<>();
for (int i = 0; i < sortedUniqueValues.length; ++i) {
encode.put(sortedUniqueValues[i], i);
}
min = 0;
gcd = 1;
} else {
uniqueValues = null;
// we do blocks if that appears to save 10+% storage
doBlocks = minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9;
if (doBlocks) {
numBitsPerValue = 0xFF;
meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT);
} else {
numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
if (gcd == 1 && min > 0 && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
min = 0;
}
meta.writeInt(-1);
}
}
}
meta.writeByte((byte) numBitsPerValue);
meta.writeLong(min);
meta.writeLong(gcd);
long startOffset = data.getFilePointer();
meta.writeLong(startOffset);
if (doBlocks) {
writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd);
} else if (numBitsPerValue != 0) {
writeValuesSingleBlock(valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, encode);
}
meta.writeLong(data.getFilePointer() - startOffset);
return new long[] { numDocsWithValue, numValues };
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class Lucene70DocValuesProducer method getSortedNumeric.
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedNumericEntry entry = sortedNumerics.get(field.name);
if (entry.numValues == entry.numDocsWithField) {
return DocValues.singleton(getNumeric(entry));
}
final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput);
final LongValues values = getNumericValues(entry);
if (entry.docsWithFieldOffset == -1) {
// dense
return new SortedNumericDocValues() {
int doc = -1;
long start, end;
int count;
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return maxDoc;
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
start = addresses.get(target);
end = addresses.get(target + 1L);
count = (int) (end - start);
return doc = target;
}
@Override
public boolean advanceExact(int target) throws IOException {
start = addresses.get(target);
end = addresses.get(target + 1L);
count = (int) (end - start);
doc = target;
return true;
}
@Override
public long nextValue() throws IOException {
return values.get(start++);
}
@Override
public int docValueCount() {
return count;
}
};
} else {
// sparse
final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
return new SortedNumericDocValues() {
boolean set;
long start, end;
int count;
@Override
public int nextDoc() throws IOException {
set = false;
return disi.nextDoc();
}
@Override
public int docID() {
return disi.docID();
}
@Override
public long cost() {
return disi.cost();
}
@Override
public int advance(int target) throws IOException {
set = false;
return disi.advance(target);
}
@Override
public boolean advanceExact(int target) throws IOException {
set = false;
return disi.advanceExact(target);
}
@Override
public long nextValue() throws IOException {
set();
return values.get(start++);
}
@Override
public int docValueCount() {
set();
return count;
}
private void set() {
if (set == false) {
final int index = disi.index();
start = addresses.get(index);
end = addresses.get(index + 1L);
count = (int) (end - start);
set = true;
}
}
};
}
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method addSortedNumericField.
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene70DocValuesFormat.SORTED_NUMERIC);
long[] stats = writeValues(field, valuesProducer);
int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1];
assert numValues >= numDocsWithField;
meta.writeInt(numDocsWithField);
if (numValues > numDocsWithField) {
long start = data.getFilePointer();
meta.writeLong(start);
meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
long addr = 0;
addressesWriter.add(addr);
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
addr += values.docValueCount();
addressesWriter.add(addr);
}
addressesWriter.finish();
meta.writeLong(data.getFilePointer() - start);
}
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class LatLonDocValuesDistanceQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
private final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
return null;
}
final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
final long value = values.nextValue();
final int lat = (int) (value >>> 32);
final int lon = (int) (value & 0xFFFFFFFF);
if (distancePredicate.test(lat, lon)) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
// TODO: what should it be?
return 100f;
}
};
return new ConstantScoreScorer(this, boost, iterator);
}
};
}
Aggregations