use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testOrdinalsJoinExplainNoMatches.
public void testOrdinalsJoinExplainNoMatches() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
if (random().nextBoolean()) {
w.flush();
}
// 4
doc = new Document();
doc.add(new TextField(productIdField, "3", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "5.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("field", "value", Field.Store.NO));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher indexSearcher = new IndexSearcher(r);
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query toQuery = new TermQuery(new Term("price", "5.0"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
for (ScoreMode scoreMode : ScoreMode.values()) {
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits);
// doc with price: 5.0
assertEquals(4, result.scoreDocs[0].doc);
Explanation explanation = indexSearcher.explain(joinQuery, 4);
assertTrue(explanation.isMatch());
assertEquals(explanation.getDescription(), "A match, join value 2");
explanation = indexSearcher.explain(joinQuery, 3);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match, join value 1");
explanation = indexSearcher.explain(joinQuery, 5);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match");
}
w.close();
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testEquals_globalOrdinalsJoin.
public void testEquals_globalOrdinalsJoin() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()))) {
String joinField = "field";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + random().nextInt(13))));
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = RandomPicks.randomFrom(random(), scoreModes);
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = RandomPicks.randomFrom(random(), scoreModes);
final Query x;
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap);
assertEquals("identical calls to createJoinQuery", x, JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode2, ordinalMap)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name6")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap)));
}
for (int i = 0; i < 13; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id", Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + i)));
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because different index readers ", x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(), indexSearcher, scoreMode1, ordinalMap)));
}
}
}
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestJoinUtil method testMinMaxDocs.
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
int minChildDocsPerParent = 2;
int maxChildDocsPerParent = 16;
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
int[] childDocsPerParent = new int[numParents];
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
childDocsPerParent[p] = numChildren;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(childDoc);
}
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query fromQuery = new TermQuery(new Term("type", "from"));
Query toQuery = new TermQuery(new Term("type", "to"));
int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
for (int i = 1; i <= iters; i++) {
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
if (VERBOSE) {
System.out.println("iter=" + i);
System.out.println("scoreMode=" + scoreMode);
System.out.println("min=" + min);
System.out.println("max=" + max);
}
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(joinQuery, collector);
int expectedCount = 0;
for (int numChildDocs : childDocsPerParent) {
if (numChildDocs >= min && numChildDocs <= max) {
expectedCount++;
}
}
assertEquals(expectedCount, collector.getTotalHits());
}
searcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class TestOrdinalMap method testRamBytesUsed.
public void testRamBytesUsed() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(TestUtil.getDefaultDocValuesFormat()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
for (int i = 0; i < maxDoc; ++i) {
Document d = new Document();
if (random().nextBoolean()) {
d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
final int numSortedSet = random().nextInt(3);
for (int j = 0; j < numSortedSet; ++j) {
d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
iw.addDocument(d);
if (rarely()) {
iw.getReader().close();
}
}
iw.commit();
DirectoryReader r = iw.getReader();
SortedDocValues sdv = MultiDocValues.getSortedValues(r, "sdv");
if (sdv instanceof MultiDocValues.MultiSortedDocValues) {
OrdinalMap map = ((MultiDocValues.MultiSortedDocValues) sdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
}
SortedSetDocValues ssdv = MultiDocValues.getSortedSetValues(r, "ssdv");
if (ssdv instanceof MultiDocValues.MultiSortedSetDocValues) {
OrdinalMap map = ((MultiDocValues.MultiSortedSetDocValues) ssdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
}
iw.close();
r.close();
dir.close();
}
use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.
the class DocValuesConsumer method mergeSortedField.
/**
* Merges the sorted docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
List<SortedDocValues> toMerge = new ArrayList<>();
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySorted();
}
toMerge.add(values);
}
final int numReaders = toMerge.size();
final SortedDocValues[] dvs = toMerge.toArray(new SortedDocValues[numReaders]);
// step 1: iterate thru each sub and mark terms still in use
TermsEnum[] liveTerms = new TermsEnum[dvs.length];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < numReaders; sub++) {
SortedDocValues dv = dvs[sub];
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
weights[sub] = dv.getValueCount();
} else {
LongBitSet bitset = new LongBitSet(dv.getValueCount());
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
int ord = dv.ordValue();
if (ord >= 0) {
bitset.set(ord);
}
}
}
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
weights[sub] = bitset.cardinality();
}
}
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
// step 3: add field
addSortedField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong FieldInfo");
}
// We must make new iterators + DocIDMerger for each iterator:
List<SortedDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(readerFieldInfo);
}
}
if (values == null) {
values = DocValues.emptySorted();
}
cost += values.cost();
subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final long finalCost = cost;
final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new SortedDocValues() {
private int docID = -1;
private int ord;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
SortedDocValuesSub sub = docIDMerger.next();
if (sub == null) {
return docID = NO_MORE_DOCS;
}
int subOrd = sub.values.ordValue();
assert subOrd != -1;
ord = (int) sub.map.get(subOrd);
docID = sub.mappedDocID;
return docID;
}
@Override
public int ordValue() {
return ord;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public int getValueCount() {
return (int) map.getValueCount();
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
return dvs[segmentNumber].lookupOrd(segmentOrd);
}
};
}
});
}
Aggregations