Search in sources :

Example 6 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class SlowCompositeReaderWrapper method getSortedSetDocValues.

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}
Also used : OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Example 7 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class TestJoinUtil method testRewrite.

public void testRewrite() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new SortedDocValuesField("join_field", new BytesRef("abc")));
    w.addDocument(doc);
    doc = new Document();
    doc.add(new SortedDocValuesField("join_field", new BytesRef("abd")));
    w.addDocument(doc);
    IndexReader reader = w.getReader();
    IndexSearcher searcher = newSearcher(reader);
    OrdinalMap ordMap = OrdinalMap.build(null, new SortedDocValues[0], 0f);
    Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(), new MatchNoDocsQuery(), searcher, RandomPicks.randomFrom(random(), ScoreMode.values()), ordMap, 0, Integer.MAX_VALUE);
    // no exception due to missing rewrites
    searcher.search(joinQuery, 1);
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) Directory(org.apache.lucene.store.Directory)

Example 8 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class TestJoinUtil method testSimpleOrdinalsJoin.

public void testSimpleOrdinalsJoin() throws Exception {
    final String idField = "id";
    final String productIdField = "productId";
    // A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
    final String typeField = "type";
    // A single sorted doc values field that holds the join values for all document types.
    // Typically during indexing a schema will automatically create this field with the values
    final String joinField = idField + productIdField;
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    // 0
    Document doc = new Document();
    doc.add(new TextField(idField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "random text", Field.Store.NO));
    doc.add(new TextField("name", "name1", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 3
    doc = new Document();
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "more random text", Field.Store.NO));
    doc.add(new TextField("name", "name2", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    w.commit();
    // 4
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    w.close();
    IndexReader r = indexSearcher.getIndexReader();
    SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
    for (int i = 0; i < values.length; i++) {
        LeafReader leafReader = r.leaves().get(i).reader();
        values[i] = DocValues.getSorted(leafReader, joinField);
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    Query toQuery = new TermQuery(new Term(typeField, "price"));
    Query fromQuery = new TermQuery(new Term("name", "name2"));
    // Search for product and return prices
    Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    TopDocs result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(4, result.scoreDocs[0].doc);
    assertEquals(5, result.scoreDocs[1].doc);
    fromQuery = new TermQuery(new Term("name", "name1"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(1, result.scoreDocs[0].doc);
    assertEquals(2, result.scoreDocs[1].doc);
    // Search for prices and return products
    fromQuery = new TermQuery(new Term("price", "20.0"));
    toQuery = new TermQuery(new Term(typeField, "product"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Example 9 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class TestJoinUtil method testMinMaxScore.

public void testMinMaxScore() throws Exception {
    String priceField = "price";
    Query priceQuery = numericDocValuesScoreQuery(priceField);
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
    Map<String, Float> lowestScoresPerParent = new HashMap<>();
    Map<String, Float> highestScoresPerParent = new HashMap<>();
    int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
    for (int p = 0; p < numParents; p++) {
        String parentId = Integer.toString(p);
        Document parentDoc = new Document();
        parentDoc.add(new StringField("id", parentId, Field.Store.YES));
        parentDoc.add(new StringField("type", "to", Field.Store.NO));
        parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
        iw.addDocument(parentDoc);
        int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
        int lowest = Integer.MAX_VALUE;
        int highest = Integer.MIN_VALUE;
        for (int c = 0; c < numChildren; c++) {
            String childId = Integer.toString(p + c);
            Document childDoc = new Document();
            childDoc.add(new StringField("id", childId, Field.Store.YES));
            childDoc.add(new StringField("type", "from", Field.Store.NO));
            childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
            int price = random().nextInt(1000);
            childDoc.add(new NumericDocValuesField(priceField, price));
            iw.addDocument(childDoc);
            lowest = Math.min(lowest, price);
            highest = Math.max(highest, price);
        }
        lowestScoresPerParent.put(parentId, (float) lowest);
        highestScoresPerParent.put(parentId, (float) highest);
    }
    iw.close();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
    for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
        values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
    fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
    Query toQuery = new TermQuery(new Term("type", "to"));
    Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
    TopDocs topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
    topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    searcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) StringField(org.apache.lucene.document.StringField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 10 with OrdinalMap

use of org.apache.lucene.index.MultiDocValues.OrdinalMap in project lucene-solr by apache.

the class DefaultSortedSetDocValuesReaderState method getDocValues.

/** Return top-level doc values. */
@Override
public SortedSetDocValues getDocValues() throws IOException {
    // TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
    OrdinalMap map = null;
    // why are we using a map?
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    assert map != null;
    int size = reader.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = reader.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = reader.maxDoc();
    return new MultiSortedSetDocValues(values, starts, map, cost);
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)15 BytesRef (org.apache.lucene.util.BytesRef)11 MultiDocValues (org.apache.lucene.index.MultiDocValues)8 SortedDocValues (org.apache.lucene.index.SortedDocValues)8 Document (org.apache.lucene.document.Document)7 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)7 Directory (org.apache.lucene.store.Directory)7 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 BooleanQuery (org.apache.lucene.search.BooleanQuery)6 FieldValueQuery (org.apache.lucene.search.FieldValueQuery)6 IndexSearcher (org.apache.lucene.search.IndexSearcher)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)6 Query (org.apache.lucene.search.Query)6 TermQuery (org.apache.lucene.search.TermQuery)6 DoublePoint (org.apache.lucene.document.DoublePoint)5 FloatPoint (org.apache.lucene.document.FloatPoint)5 IntPoint (org.apache.lucene.document.IntPoint)5