use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.
private void doTestSparseDocValuesVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
// single-valued
long docValue = values[random().nextInt(values.length)];
doc.add(new NumericDocValuesField("numeric", docValue));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("value", docValue));
// multi-valued
final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
docValue = values[random().nextInt(values.length)];
doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("values", docValue));
}
writer.addDocument(doc);
// add a gap
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
final IndexReader indexReader = writer.getReader();
writer.close();
for (LeafReaderContext context : indexReader.leaves()) {
final LeafReader reader = context.reader();
final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final IndexableField valueField = doc.getField("value");
final Long value = valueField == null ? null : valueField.numericValue().longValue();
if (value == null) {
assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
} else {
assertEquals(i, numeric.nextDoc());
assertEquals(i, binary.nextDoc());
assertEquals(i, sorted.nextDoc());
assertEquals(value.longValue(), numeric.longValue());
assertTrue(sorted.ordValue() >= 0);
assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
}
final IndexableField[] valuesFields = doc.getFields("values");
if (valuesFields.length == 0) {
assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
} else {
final Set<Long> valueSet = new HashSet<>();
for (IndexableField sf : valuesFields) {
valueSet.add(sf.numericValue().longValue());
}
assertEquals(i, sortedNumeric.nextDoc());
assertEquals(valuesFields.length, sortedNumeric.docValueCount());
for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
break;
}
assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
sortedSetCount++;
}
assertEquals(valueSet.size(), sortedSetCount);
}
}
}
indexReader.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestPointQueries method verifyLongs.
// verify for long values
private static void verifyLongs(long[] values, int[] ids) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < values.length / 100) {
iwc.setMaxBufferedDocs(values.length / 100);
}
iwc.setCodec(getCodec());
Directory dir;
if (values.length > 100000) {
dir = newMaybeVirusCheckingFSDirectory(createTempDir("TestRangeTree"));
} else {
dir = newMaybeVirusCheckingDirectory();
}
int missingPct = random().nextInt(100);
int deletedPct = random().nextInt(100);
if (VERBOSE) {
System.out.println(" missingPct=" + missingPct);
System.out.println(" deletedPct=" + deletedPct);
}
BitSet missing = new BitSet();
BitSet deleted = new BitSet();
Document doc = null;
int lastID = -1;
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int ord = 0; ord < values.length; ord++) {
int id;
if (ids == null) {
id = ord;
} else {
id = ids[ord];
}
if (id != lastID) {
if (random().nextInt(100) < missingPct) {
missing.set(id);
if (VERBOSE) {
System.out.println(" missing id=" + id);
}
}
if (doc != null) {
w.addDocument(doc);
if (random().nextInt(100) < deletedPct) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.set(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
lastID = id;
}
if (missing.get(id) == false) {
doc.add(new LongPoint("sn_value", values[id]));
byte[] bytes = new byte[8];
NumericUtils.longToSortableBytes(values[id], bytes, 0);
doc.add(new BinaryPoint("ss_value", bytes));
}
}
w.addDocument(doc);
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" forceMerge(1)");
}
w.forceMerge(1);
}
final IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
int numThreads = TestUtil.nextInt(random(), 2, 5);
if (VERBOSE) {
System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
}
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(100);
final CountDownLatch startingGun = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
for (int i = 0; i < numThreads; i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
failed.set(true);
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
for (int iter = 0; iter < iters && failed.get() == false; iter++) {
Long lower = randomValue();
Long upper = randomValue();
if (upper < lower) {
long x = lower;
lower = upper;
upper = x;
}
Query query;
if (VERBOSE) {
System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
byte[] tmp = new byte[8];
if (lower != null) {
NumericUtils.longToSortableBytes(lower, tmp, 0);
System.out.println(" lower bytes=" + Arrays.toString(tmp));
}
if (upper != null) {
NumericUtils.longToSortableBytes(upper, tmp, 0);
System.out.println(" upper bytes=" + Arrays.toString(tmp));
}
}
if (random().nextBoolean()) {
query = LongPoint.newRangeQuery("sn_value", lower, upper);
} else {
byte[] lowerBytes = new byte[8];
NumericUtils.longToSortableBytes(lower, lowerBytes, 0);
byte[] upperBytes = new byte[8];
NumericUtils.longToSortableBytes(upper, upperBytes, 0);
query = BinaryPoint.newRangeQuery("ss_value", lowerBytes, upperBytes);
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
}
final BitSet hits = new BitSet();
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": hitCount: " + hits.cardinality());
}
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < r.maxDoc(); docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected = missing.get(id) == false && deleted.get(id) == false && values[id] >= lower && values[id] <= upper;
if (hits.get(docID) != expected) {
// We do exact quantized comparison so the bbox query should never disagree:
fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.get(id) + " query=" + query);
}
}
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class SimpleTextDocValuesWriter method addNumericField.
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.NUMERIC || field.hasNorms();
writeFieldEntry(field, DocValuesType.NUMERIC);
// first pass to find min/max
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
NumericDocValues values = valuesProducer.getNumeric(field);
int numValues = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
long v = values.longValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
numValues++;
}
if (numValues != numDocs) {
minValue = Math.min(minValue, 0);
maxValue = Math.max(maxValue, 0);
}
// write our minimum value to the .dat, all entries are deltas from that
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
// build up our fixed-width "simple text packed ints"
// format
BigInteger maxBig = BigInteger.valueOf(maxValue);
BigInteger minBig = BigInteger.valueOf(minValue);
BigInteger diffBig = maxBig.subtract(minBig);
int maxBytesPerValue = diffBig.toString().length();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < maxBytesPerValue; i++) {
sb.append('0');
}
// write our pattern to the .dat
SimpleTextUtil.write(data, PATTERN);
SimpleTextUtil.write(data, sb.toString(), scratch);
SimpleTextUtil.writeNewline(data);
final String patternString = sb.toString();
final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
int numDocsWritten = 0;
// second pass to write the values
values = valuesProducer.getNumeric(field);
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
assert values.docID() >= i;
}
long value = values.docID() != i ? 0 : values.longValue();
assert value >= minValue;
Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
String s = encoder.format(delta);
assert s.length() == patternString.length();
SimpleTextUtil.write(data, s, scratch);
SimpleTextUtil.writeNewline(data);
if (values.docID() != i) {
SimpleTextUtil.write(data, "F", scratch);
} else {
SimpleTextUtil.write(data, "T", scratch);
}
SimpleTextUtil.writeNewline(data);
numDocsWritten++;
assert numDocsWritten <= numDocs;
}
assert numDocs == numDocsWritten : "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method numericIterable.
/** Converts values from {@link NumericDocValues} into {@code Iterable<Number>}.
*
* @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> numericIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final NumericDocValues values;
try {
values = valuesProducer.getNumeric(field);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int docIDUpto = -1;
@Override
public boolean hasNext() {
return docIDUpto + 1 < maxDoc;
}
@Override
public Number next() {
docIDUpto++;
if (docIDUpto > values.docID()) {
try {
values.nextDoc();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Number result;
if (docIDUpto == values.docID()) {
try {
result = values.longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
result = null;
}
return result;
}
};
}
};
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method normsIterable.
/** Converts norms into {@code Iterable<Number>}.
*
* @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final NumericDocValues values;
try {
values = normsProducer.getNorms(field);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int docIDUpto = -1;
@Override
public boolean hasNext() {
return docIDUpto + 1 < maxDoc;
}
@Override
public Number next() {
docIDUpto++;
if (docIDUpto > values.docID()) {
try {
values.nextDoc();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Number result;
if (docIDUpto == values.docID()) {
try {
result = values.longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
// Unlike NumericDocValues, norms used to return 0 for missing values:
result = 0;
}
return result;
}
};
}
};
}
Aggregations