use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestMemoryIndex method testDocValues.
public void testDocValues() throws Exception {
Document doc = new Document();
doc.add(new NumericDocValuesField("numeric", 29L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
assertEquals(0, numericDocValues.nextDoc());
assertEquals(29L, numericDocValues.longValue());
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
assertEquals(0, sortedNumericDocValues.nextDoc());
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
assertEquals(0, sortedDocValues.nextDoc());
assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
assertEquals(0, sortedDocValues.ordValue());
assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCacheVsDocValues method doTestSortedVsFieldCache.
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = new StringField("indexed", "", Field.Store.NO);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(indexedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
// fixed length
length = minLength;
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
String value = TestUtil.randomSimpleString(random(), length);
indexedField.setStringValue(value);
dvField.setBytesValue(new BytesRef(value));
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCacheWithThreads method test.
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
final List<Long> numbers = new ArrayList<>();
final List<BytesRef> binary = new ArrayList<>();
final List<BytesRef> sorted = new ArrayList<>();
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; i++) {
Document d = new Document();
long number = random().nextLong();
d.add(new NumericDocValuesField("number", number));
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new BinaryDocValuesField("bytes", bytes));
binary.add(bytes);
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new SortedDocValuesField("sorted", bytes));
sorted.add(bytes);
w.addDocument(d);
numbers.add(number);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
assertEquals(1, r.leaves().size());
final LeafReader ar = r.leaves().get(0).reader();
int numThreads = TestUtil.nextInt(random(), 2, 5);
List<Thread> threads = new ArrayList<>();
final CountDownLatch startingGun = new CountDownLatch(1);
for (int t = 0; t < numThreads; t++) {
final Random threadRandom = new Random(random().nextLong());
Thread thread = new Thread() {
@Override
public void run() {
try {
startingGun.await();
int iters = atLeast(1000);
for (int iter = 0; iter < iters; iter++) {
int docID = threadRandom.nextInt(numDocs);
switch(threadRandom.nextInt(4)) {
case 0:
{
NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER);
assertEquals(docID, values.advance(docID));
assertEquals(numbers.get(docID).longValue(), values.longValue());
}
break;
case 1:
{
NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER);
assertEquals(docID, values.advance(docID));
assertEquals(numbers.get(docID).longValue(), values.longValue());
}
break;
case 2:
{
NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER);
assertEquals(docID, values.advance(docID));
assertEquals(numbers.get(docID).longValue(), values.longValue());
}
break;
case 3:
{
NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(docID, values.advance(docID));
assertEquals(numbers.get(docID).longValue(), values.longValue());
}
break;
}
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes");
assertEquals(docID, bdv.advance(docID));
assertEquals(binary.get(docID), bdv.binaryValue());
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(docID, sdv.advance(docID));
assertEquals(sorted.get(docID), sdv.binaryValue());
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
thread.start();
threads.add(thread);
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestJoinUtil method testOrdinalsJoinExplainNoMatches.
public void testOrdinalsJoinExplainNoMatches() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
if (random().nextBoolean()) {
w.flush();
}
// 4
doc = new Document();
doc.add(new TextField(productIdField, "3", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "5.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("field", "value", Field.Store.NO));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher indexSearcher = new IndexSearcher(r);
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query toQuery = new TermQuery(new Term("price", "5.0"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
for (ScoreMode scoreMode : ScoreMode.values()) {
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits);
// doc with price: 5.0
assertEquals(4, result.scoreDocs[0].doc);
Explanation explanation = indexSearcher.explain(joinQuery, 4);
assertTrue(explanation.isMatch());
assertEquals(explanation.getDescription(), "A match, join value 2");
explanation = indexSearcher.explain(joinQuery, 3);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match, join value 1");
explanation = indexSearcher.explain(joinQuery, 5);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match");
}
w.close();
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestJoinUtil method testEquals_globalOrdinalsJoin.
public void testEquals_globalOrdinalsJoin() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()))) {
String joinField = "field";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + random().nextInt(13))));
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = RandomPicks.randomFrom(random(), scoreModes);
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = RandomPicks.randomFrom(random(), scoreModes);
final Query x;
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap);
assertEquals("identical calls to createJoinQuery", x, JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode2, ordinalMap)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name6")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap)));
}
for (int i = 0; i < 13; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id", Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + i)));
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because different index readers ", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap)));
}
}
}
}
Aggregations