use of org.apache.lucene.index.NumericDocValues in project stargate-core by tuplejump.
the class IndexEntryCollector method getIndexEntry.
IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
Map<String, Number> numericDocValues = new HashMap<>();
Map<String, String> binaryDocValues = new HashMap<>();
for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
Type type = AggregateFunction.getLuceneType(options, entry.getKey());
Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
numericDocValues.put(entry.getKey(), number);
}
for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
}
return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCache method test.
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, doubles.nextDoc());
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, longs.nextDoc());
assertEquals(Long.MAX_VALUE - i, longs.longValue());
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, ints.nextDoc());
assertEquals(Integer.MAX_VALUE - i, ints.longValue());
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, floats.nextDoc());
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i % 2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (i > termsIndex.docID()) {
termsIndex.advance(i);
}
if (i == termsIndex.docID()) {
s = termsIndex.binaryValue().utf8ToString();
} else {
s = null;
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i = 0; i < nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for (int i = 0; i < nTerms; i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
if (terms.docID() < i) {
terms.nextDoc();
}
if (terms.docID() == i) {
assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
} else {
assertNull(unicodeStrings[i]);
}
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
if (i > termOrds.docID()) {
assertEquals(i, termOrds.nextDoc());
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
if (i == termOrds.docID()) {
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonexistantFields.
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonIndexedFields.
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method doTestSparseNumericBlocksOfVariousBitsPerValue.
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(storedField);
doc.add(dvField);
final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE * 3);
final LongSupplier longs = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
NumericDocValues docValues = DocValues.getNumeric(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
String storedValue = r.document(i).get("stored");
if (storedValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(Long.parseLong(storedValue), docValues.longValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
dir.close();
}
Aggregations