use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestPointQueries method verifyLongs.
// verify for long values
private static void verifyLongs(long[] values, int[] ids) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < values.length / 100) {
iwc.setMaxBufferedDocs(values.length / 100);
}
iwc.setCodec(getCodec());
Directory dir;
if (values.length > 100000) {
dir = newMaybeVirusCheckingFSDirectory(createTempDir("TestRangeTree"));
} else {
dir = newMaybeVirusCheckingDirectory();
}
int missingPct = random().nextInt(100);
int deletedPct = random().nextInt(100);
if (VERBOSE) {
System.out.println(" missingPct=" + missingPct);
System.out.println(" deletedPct=" + deletedPct);
}
BitSet missing = new BitSet();
BitSet deleted = new BitSet();
Document doc = null;
int lastID = -1;
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int ord = 0; ord < values.length; ord++) {
int id;
if (ids == null) {
id = ord;
} else {
id = ids[ord];
}
if (id != lastID) {
if (random().nextInt(100) < missingPct) {
missing.set(id);
if (VERBOSE) {
System.out.println(" missing id=" + id);
}
}
if (doc != null) {
w.addDocument(doc);
if (random().nextInt(100) < deletedPct) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.set(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
lastID = id;
}
if (missing.get(id) == false) {
doc.add(new LongPoint("sn_value", values[id]));
byte[] bytes = new byte[8];
NumericUtils.longToSortableBytes(values[id], bytes, 0);
doc.add(new BinaryPoint("ss_value", bytes));
}
}
w.addDocument(doc);
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" forceMerge(1)");
}
w.forceMerge(1);
}
final IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
int numThreads = TestUtil.nextInt(random(), 2, 5);
if (VERBOSE) {
System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
}
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(100);
final CountDownLatch startingGun = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
for (int i = 0; i < numThreads; i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
failed.set(true);
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
for (int iter = 0; iter < iters && failed.get() == false; iter++) {
Long lower = randomValue();
Long upper = randomValue();
if (upper < lower) {
long x = lower;
lower = upper;
upper = x;
}
Query query;
if (VERBOSE) {
System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
byte[] tmp = new byte[8];
if (lower != null) {
NumericUtils.longToSortableBytes(lower, tmp, 0);
System.out.println(" lower bytes=" + Arrays.toString(tmp));
}
if (upper != null) {
NumericUtils.longToSortableBytes(upper, tmp, 0);
System.out.println(" upper bytes=" + Arrays.toString(tmp));
}
}
if (random().nextBoolean()) {
query = LongPoint.newRangeQuery("sn_value", lower, upper);
} else {
byte[] lowerBytes = new byte[8];
NumericUtils.longToSortableBytes(lower, lowerBytes, 0);
byte[] upperBytes = new byte[8];
NumericUtils.longToSortableBytes(upper, upperBytes, 0);
query = BinaryPoint.newRangeQuery("ss_value", lowerBytes, upperBytes);
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
}
final BitSet hits = new BitSet();
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": hitCount: " + hits.cardinality());
}
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < r.maxDoc(); docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected = missing.get(id) == false && deleted.get(id) == false && values[id] >= lower && values[id] <= upper;
if (hits.get(docID) != expected) {
// We do exact quantized comparison so the bbox query should never disagree:
fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.get(id) + " query=" + query);
}
}
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class SimpleTextDocValuesWriter method addNumericField.
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.NUMERIC || field.hasNorms();
writeFieldEntry(field, DocValuesType.NUMERIC);
// first pass to find min/max
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
NumericDocValues values = valuesProducer.getNumeric(field);
int numValues = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
long v = values.longValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
numValues++;
}
if (numValues != numDocs) {
minValue = Math.min(minValue, 0);
maxValue = Math.max(maxValue, 0);
}
// write our minimum value to the .dat, all entries are deltas from that
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
// build up our fixed-width "simple text packed ints"
// format
BigInteger maxBig = BigInteger.valueOf(maxValue);
BigInteger minBig = BigInteger.valueOf(minValue);
BigInteger diffBig = maxBig.subtract(minBig);
int maxBytesPerValue = diffBig.toString().length();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < maxBytesPerValue; i++) {
sb.append('0');
}
// write our pattern to the .dat
SimpleTextUtil.write(data, PATTERN);
SimpleTextUtil.write(data, sb.toString(), scratch);
SimpleTextUtil.writeNewline(data);
final String patternString = sb.toString();
final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
int numDocsWritten = 0;
// second pass to write the values
values = valuesProducer.getNumeric(field);
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
assert values.docID() >= i;
}
long value = values.docID() != i ? 0 : values.longValue();
assert value >= minValue;
Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
String s = encoder.format(delta);
assert s.length() == patternString.length();
SimpleTextUtil.write(data, s, scratch);
SimpleTextUtil.writeNewline(data);
if (values.docID() != i) {
SimpleTextUtil.write(data, "F", scratch);
} else {
SimpleTextUtil.write(data, "T", scratch);
}
SimpleTextUtil.writeNewline(data);
numDocsWritten++;
assert numDocsWritten <= numDocs;
}
assert numDocs == numDocsWritten : "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method numericIterable.
/** Converts values from {@link NumericDocValues} into {@code Iterable<Number>}.
*
* @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> numericIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final NumericDocValues values;
try {
values = valuesProducer.getNumeric(field);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int docIDUpto = -1;
@Override
public boolean hasNext() {
return docIDUpto + 1 < maxDoc;
}
@Override
public Number next() {
docIDUpto++;
if (docIDUpto > values.docID()) {
try {
values.nextDoc();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Number result;
if (docIDUpto == values.docID()) {
try {
result = values.longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
result = null;
}
return result;
}
};
}
};
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method normsIterable.
/** Converts norms into {@code Iterable<Number>}.
*
* @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final NumericDocValues values;
try {
values = normsProducer.getNorms(field);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int docIDUpto = -1;
@Override
public boolean hasNext() {
return docIDUpto + 1 < maxDoc;
}
@Override
public Number next() {
docIDUpto++;
if (docIDUpto > values.docID()) {
try {
values.nextDoc();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Number result;
if (docIDUpto == values.docID()) {
try {
result = values.longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
// Unlike NumericDocValues, norms used to return 0 for missing values:
result = 0;
}
return result;
}
};
}
};
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class SortedNumericSelector method wrap.
/**
* Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector
* and numericType.
*/
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) {
throw new IllegalArgumentException("numericType must be a numeric type");
}
final NumericDocValues view;
NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
view = singleton;
} else {
switch(selector) {
case MIN:
view = new MinValue(sortedNumeric);
break;
case MAX:
view = new MaxValue(sortedNumeric);
break;
default:
throw new AssertionError();
}
}
// undo the numericutils sortability
switch(numericType) {
case FLOAT:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableFloatBits((int) in.longValue());
}
};
case DOUBLE:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableDoubleBits(in.longValue());
}
};
default:
return view;
}
}
Aggregations