use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class SlowCompositeReaderWrapper method getSortedSetDocValues.
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
ensureOpen();
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
assert map != null;
int size = in.leaves().size();
final SortedSetDocValues[] values = new SortedSetDocValues[size];
final int[] starts = new int[size + 1];
long cost = 0;
for (int i = 0; i < size; i++) {
LeafReaderContext context = in.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
return null;
}
SortedSetDocValues v = reader.getSortedSetDocValues(field);
if (v == null) {
v = DocValues.emptySortedSet();
}
values[i] = v;
starts[i] = context.docBase;
cost += v.cost();
}
starts[size] = maxDoc();
return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testRewrite.
public void testRewrite() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("join_field", new BytesRef("abc")));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("join_field", new BytesRef("abd")));
w.addDocument(doc);
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
OrdinalMap ordMap = OrdinalMap.build(null, new SortedDocValues[0], 0f);
Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(), new MatchNoDocsQuery(), searcher, RandomPicks.randomFrom(random(), ScoreMode.values()), ordMap, 0, Integer.MAX_VALUE);
// no exception due to missing rewrites
searcher.search(joinQuery, 1);
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testSimpleOrdinalsJoin.
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testMinMaxScore.
public void testMinMaxScore() throws Exception {
String priceField = "price";
Query priceQuery = numericDocValuesScoreQuery(priceField);
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
Map<String, Float> lowestScoresPerParent = new HashMap<>();
Map<String, Float> highestScoresPerParent = new HashMap<>();
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
int lowest = Integer.MAX_VALUE;
int highest = Integer.MIN_VALUE;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
int price = random().nextInt(1000);
childDoc.add(new NumericDocValuesField(priceField, price));
iw.addDocument(childDoc);
lowest = Math.min(lowest, price);
highest = Math.max(highest, price);
}
lowestScoresPerParent.put(parentId, (float) lowest);
highestScoresPerParent.put(parentId, (float) highest);
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
TopDocs topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
}
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
}
searcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class DefaultSortedSetDocValuesReaderState method getDocValues.
/** Return top-level doc values. */
@Override
public SortedSetDocValues getDocValues() throws IOException {
// TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
OrdinalMap map = null;
// why are we using a map?
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
assert map != null;
int size = reader.leaves().size();
final SortedSetDocValues[] values = new SortedSetDocValues[size];
final int[] starts = new int[size + 1];
long cost = 0;
for (int i = 0; i < size; i++) {
LeafReaderContext context = reader.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
return null;
}
SortedSetDocValues v = reader.getSortedSetDocValues(field);
if (v == null) {
v = DocValues.emptySortedSet();
}
values[i] = v;
starts[i] = context.docBase;
cost += v.cost();
}
starts[size] = reader.maxDoc();
return new MultiSortedSetDocValues(values, starts, map, cost);
}
Aggregations