use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCache method testIntFieldCache.
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
public void testIntFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
IntPoint field = new IntPoint("f", 0);
doc.add(field);
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
Set<Integer> missing = new HashSet<>();
for (int i = 0; i < values.length; ++i) {
final int v;
switch(random().nextInt(10)) {
case 0:
v = Integer.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Integer.MAX_VALUE;
break;
default:
v = TestUtil.nextInt(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
missing.add(i);
} else {
field.setIntValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < values.length; ++i) {
if (missing.contains(i) == false) {
assertEquals(i, ints.nextDoc());
assertEquals(values[i], ints.longValue());
}
}
assertEquals(NO_MORE_DOCS, ints.nextDoc());
reader.close();
iw.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCacheWithThreads method test2.
public void test2() throws Exception {
Random random = random();
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random);
} else {
s = TestUtil.randomUnicodeString(random);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document();
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(new NumericDocValuesField("id", numDocs));
docValues.add(br);
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
writer.forceMerge(1);
final DirectoryReader r = writer.getReader();
writer.close();
final LeafReader sr = getOnlyLeafReader(r);
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
Thread[] threads = new Thread[NUM_THREADS];
for (int thread = 0; thread < NUM_THREADS; thread++) {
threads[thread] = new Thread() {
@Override
public void run() {
Random random = random();
final SortedDocValues stringDVDirect;
final NumericDocValues docIDToID;
try {
stringDVDirect = sr.getSortedDocValues("stringdv");
docIDToID = sr.getNumericDocValues("id");
assertNotNull(stringDVDirect);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
int[] docIDToIDArray = new int[sr.maxDoc()];
for (int i = 0; i < sr.maxDoc(); i++) {
try {
assertEquals(i, docIDToID.nextDoc());
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
try {
docIDToIDArray[i] = (int) docIDToID.longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
while (System.nanoTime() < END_TIME) {
for (int iter = 0; iter < 100; iter++) {
final int docID = random.nextInt(sr.maxDoc());
try {
SortedDocValues dvs = sr.getSortedDocValues("stringdv");
assertEquals(docID, dvs.advance(docID));
assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue());
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
}
};
threads[thread].start();
}
for (Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class Lucene54DocValuesProducer method getNumeric.
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
Bits docsWithField;
if (entry.format == SPARSE_COMPRESSED) {
return getSparseNumericDocValues(entry);
} else {
if (entry.missingOffset == ALL_MISSING) {
return DocValues.emptyNumeric();
} else if (entry.missingOffset == ALL_LIVE) {
LongValues values = getNumeric(entry);
return new NumericDocValues() {
private int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == maxDoc) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (target >= maxDoc) {
docID = NO_MORE_DOCS;
} else {
docID = target;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return true;
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public long longValue() {
return values.get(docID);
}
};
} else {
docsWithField = getLiveBits(entry.missingOffset, maxDoc);
}
}
final LongValues values = getNumeric(entry);
return new NumericDocValues() {
int doc = -1;
long value;
@Override
public long longValue() throws IOException {
return value;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
for (int doc = target; doc < maxDoc; ++doc) {
value = values.get(doc);
if (value != 0 || docsWithField.get(doc)) {
return this.doc = doc;
}
}
return doc = NO_MORE_DOCS;
}
@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
value = values.get(doc);
return value != 0 || docsWithField.get(doc);
}
@Override
public long cost() {
return maxDoc;
}
};
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestLucene54DocValuesFormat method doTestSparseDocValuesVsStoredFields.
private void doTestSparseDocValuesVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
// single-valued
long docValue = values[random().nextInt(values.length)];
doc.add(new NumericDocValuesField("numeric", docValue));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("value", docValue));
// multi-valued
final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
docValue = values[random().nextInt(values.length)];
doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("values", docValue));
}
writer.addDocument(doc);
// add a gap
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
final IndexReader indexReader = writer.getReader();
TestUtil.checkReader(indexReader);
writer.close();
for (LeafReaderContext context : indexReader.leaves()) {
final LeafReader reader = context.reader();
final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final IndexableField valueField = doc.getField("value");
final Long value = valueField == null ? null : valueField.numericValue().longValue();
if (value == null) {
assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
} else {
assertEquals(i, numeric.nextDoc());
assertEquals(i, binary.nextDoc());
assertEquals(i, sorted.nextDoc());
assertEquals(value.longValue(), numeric.longValue());
assertTrue(sorted.ordValue() >= 0);
assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
}
final IndexableField[] valuesFields = doc.getFields("values");
if (valuesFields.length == 0) {
assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
} else {
final Set<Long> valueSet = new HashSet<>();
for (IndexableField sf : valuesFields) {
valueSet.add(sf.numericValue().longValue());
}
assertEquals(i, sortedNumeric.nextDoc());
assertEquals(valuesFields.length, sortedNumeric.docValueCount());
for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
break;
}
assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
sortedSetCount++;
}
assertEquals(valueSet.size(), sortedSetCount);
}
}
}
indexReader.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class SortedNumericDocValuesRangeQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedNumericDocValues values = getValues(context.reader(), field);
if (values == null) {
return null;
}
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
if (singleton != null) {
iterator = new TwoPhaseIterator(singleton) {
@Override
public boolean matches() throws IOException {
final long value = singleton.longValue();
return value >= lowerValue && value <= upperValue;
}
@Override
public float matchCost() {
// 2 comparisons
return 2;
}
};
} else {
iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
final long value = values.nextValue();
if (value < lowerValue) {
continue;
}
// Values are sorted, so the first value that is >= lowerValue is our best candidate
return value <= upperValue;
}
// all values were < lowerValue
return false;
}
@Override
public float matchCost() {
// 2 comparisons
return 2;
}
};
}
return new ConstantScoreScorer(this, score(), iterator);
}
};
}
Aggregations