Examples with SortedDocValues - org.apache.lucene.index.SortedDocValues

Example 16 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestJoinUtil method testSimpleOrdinalsJoin.

public void testSimpleOrdinalsJoin() throws Exception {
    final String idField = "id";
    final String productIdField = "productId";
    // A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
    final String typeField = "type";
    // A single sorted doc values field that holds the join values for all document types.
    // Typically during indexing a schema will automatically create this field with the values
    final String joinField = idField + productIdField;
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    // 0
    Document doc = new Document();
    doc.add(new TextField(idField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "random text", Field.Store.NO));
    doc.add(new TextField("name", "name1", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    w.addDocument(doc);
    // 3
    doc = new Document();
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "more random text", Field.Store.NO));
    doc.add(new TextField("name", "name2", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    w.commit();
    // 4
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    w.addDocument(doc);
    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    w.close();
    IndexReader r = indexSearcher.getIndexReader();
    SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
    for (int i = 0; i < values.length; i++) {
        LeafReader leafReader = r.leaves().get(i).reader();
        values[i] = DocValues.getSorted(leafReader, joinField);
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    Query toQuery = new TermQuery(new Term(typeField, "price"));
    Query fromQuery = new TermQuery(new Term("name", "name2"));
    // Search for product and return prices
    Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    TopDocs result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(4, result.scoreDocs[0].doc);
    assertEquals(5, result.scoreDocs[1].doc);
    fromQuery = new TermQuery(new Term("name", "name1"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(1, result.scoreDocs[0].doc);
    assertEquals(2, result.scoreDocs[1].doc);
    // Search for prices and return products
    fromQuery = new TermQuery(new Term("price", "20.0"));
    toQuery = new TermQuery(new Term(typeField, "product"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);
    indexSearcher.getIndexReader().close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Example 17 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestJoinUtil method createContext.

private IndexIterationContext createContext(int nDocs, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
    if (globalOrdinalJoin) {
        assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
    }
    Directory dir = newDirectory();
    final Random random = random();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
    IndexIterationContext context = new IndexIterationContext();
    int numRandomValues = nDocs / RandomNumbers.randomIntBetween(random, 1, 4);
    context.randomUniqueValues = new String[numRandomValues];
    Set<String> trackSet = new HashSet<>();
    context.randomFrom = new boolean[numRandomValues];
    for (int i = 0; i < numRandomValues; i++) {
        String uniqueRandomValue;
        do {
            // the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
            final int nextInt = random.nextInt(Integer.MAX_VALUE);
            uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
            assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue, 16);
        } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
        // Generate unique values and empty strings aren't allowed.
        trackSet.add(uniqueRandomValue);
        context.randomFrom[i] = random.nextBoolean();
        context.randomUniqueValues[i] = uniqueRandomValue;
    }
    List<String> randomUniqueValuesReplica = new ArrayList<>(Arrays.asList(context.randomUniqueValues));
    RandomDoc[] docs = new RandomDoc[nDocs];
    for (int i = 0; i < nDocs; i++) {
        String id = Integer.toString(i);
        int randomI = random.nextInt(context.randomUniqueValues.length);
        String value = context.randomUniqueValues[randomI];
        Document document = new Document();
        document.add(newTextField(random, "id", id, Field.Store.YES));
        document.add(newTextField(random, "value", value, Field.Store.NO));
        boolean from = context.randomFrom[randomI];
        int numberOfLinkValues = multipleValuesPerDocument ? Math.min(2 + random.nextInt(10), context.randomUniqueValues.length) : 1;
        docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
        if (globalOrdinalJoin) {
            document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
        }
        final List<String> subValues;
        {
            int start = randomUniqueValuesReplica.size() == numberOfLinkValues ? 0 : random.nextInt(randomUniqueValuesReplica.size() - numberOfLinkValues);
            subValues = randomUniqueValuesReplica.subList(start, start + numberOfLinkValues);
            Collections.shuffle(subValues, random);
        }
        for (String linkValue : subValues) {
            assert !docs[i].linkValues.contains(linkValue);
            docs[i].linkValues.add(linkValue);
            if (from) {
                if (!context.fromDocuments.containsKey(linkValue)) {
                    context.fromDocuments.put(linkValue, new ArrayList<>());
                }
                if (!context.randomValueFromDocs.containsKey(value)) {
                    context.randomValueFromDocs.put(value, new ArrayList<>());
                }
                context.fromDocuments.get(linkValue).add(docs[i]);
                context.randomValueFromDocs.get(value).add(docs[i]);
                addLinkFields(random, document, "from", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
            } else {
                if (!context.toDocuments.containsKey(linkValue)) {
                    context.toDocuments.put(linkValue, new ArrayList<>());
                }
                if (!context.randomValueToDocs.containsKey(value)) {
                    context.randomValueToDocs.put(value, new ArrayList<>());
                }
                context.toDocuments.get(linkValue).add(docs[i]);
                context.randomValueToDocs.get(value).add(docs[i]);
                addLinkFields(random, document, "to", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
            }
        }
        w.addDocument(document);
        if (random.nextInt(10) == 4) {
            w.commit();
        }
        if (VERBOSE) {
            System.out.println("Added document[" + docs[i].id + "]: " + document);
        }
    }
    if (random.nextBoolean()) {
        w.forceMerge(1);
    }
    w.close();
    // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
    // any ScoreMode.
    DirectoryReader topLevelReader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(topLevelReader);
    for (int i = 0; i < context.randomUniqueValues.length; i++) {
        String uniqueRandomValue = context.randomUniqueValues[i];
        final String fromField;
        final String toField;
        final Map<String, Map<Integer, JoinScore>> queryVals;
        if (context.randomFrom[i]) {
            fromField = "from";
            toField = "to";
            queryVals = context.fromHitsToJoinScore;
        } else {
            fromField = "to";
            toField = "from";
            queryVals = context.toHitsToJoinScore;
        }
        final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<>();
        if (multipleValuesPerDocument) {
            searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {

                private Scorer scorer;

                private SortedSetDocValues docTermOrds;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > docTermOrds.docID()) {
                        docTermOrds.advance(doc);
                    }
                    if (doc == docTermOrds.docID()) {
                        long ord;
                        while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                            final BytesRef joinValue = docTermOrds.lookupOrd(ord);
                            JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                            if (joinScore == null) {
                                joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                            }
                            joinScore.addScore(scorer.score());
                        }
                    }
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    docTermOrds = DocValues.getSortedSet(context.reader(), fromField);
                }

                @Override
                public void setScorer(Scorer scorer) {
                    this.scorer = scorer;
                }

                @Override
                public boolean needsScores() {
                    return true;
                }
            });
        } else {
            searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {

                private Scorer scorer;

                private BinaryDocValues terms;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > terms.docID()) {
                        terms.advance(doc);
                    }
                    final BytesRef joinValue;
                    if (doc == terms.docID()) {
                        joinValue = terms.binaryValue();
                    } else {
                        // missing;
                        return;
                    }
                    JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                    if (joinScore == null) {
                        joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                    }
                    if (VERBOSE) {
                        System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
                    }
                    joinScore.addScore(scorer.score());
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    terms = DocValues.getBinary(context.reader(), fromField);
                }

                @Override
                public void setScorer(Scorer scorer) {
                    this.scorer = scorer;
                }

                @Override
                public boolean needsScores() {
                    return true;
                }
            });
        }
        final Map<Integer, JoinScore> docToJoinScore = new HashMap<>();
        if (multipleValuesPerDocument) {
            Terms terms = MultiFields.getTerms(topLevelReader, toField);
            if (terms != null) {
                PostingsEnum postingsEnum = null;
                SortedSet<BytesRef> joinValues = new TreeSet<>();
                joinValues.addAll(joinValueToJoinScores.keySet());
                for (BytesRef joinValue : joinValues) {
                    TermsEnum termsEnum = terms.iterator();
                    if (termsEnum.seekExact(joinValue)) {
                        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                        for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
                            // Something to keep in mind for many-to-many relations.
                            if (!docToJoinScore.containsKey(doc)) {
                                docToJoinScore.put(doc, joinScore);
                            }
                        }
                    }
                }
            }
        } else {
            searcher.search(new MatchAllDocsQuery(), new SimpleCollector() {

                private BinaryDocValues terms;

                private int docBase;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > terms.docID()) {
                        terms.advance(doc);
                    }
                    final BytesRef joinValue;
                    if (doc == terms.docID()) {
                        joinValue = terms.binaryValue();
                    } else {
                        // missing;
                        joinValue = new BytesRef(BytesRef.EMPTY_BYTES);
                    }
                    JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                    if (joinScore == null) {
                        return;
                    }
                    docToJoinScore.put(docBase + doc, joinScore);
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    terms = DocValues.getBinary(context.reader(), toField);
                    docBase = context.docBase;
                }

                @Override
                public void setScorer(Scorer scorer) {
                }

                @Override
                public boolean needsScores() {
                    return false;
                }
            });
        }
        queryVals.put(uniqueRandomValue, docToJoinScore);
    }
    if (globalOrdinalJoin) {
        SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
        for (LeafReaderContext leadContext : topLevelReader.leaves()) {
            values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
        }
        context.ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    }
    context.searcher = searcher;
    context.dir = dir;
    return context;
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Scorer(org.apache.lucene.search.Scorer) FilterScorer(org.apache.lucene.search.FilterScorer) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SimpleCollector(org.apache.lucene.search.SimpleCollector) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TreeSet(java.util.TreeSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Map(java.util.Map) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) HashMap(java.util.HashMap) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 18 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestJoinUtil method testMinMaxScore.

public void testMinMaxScore() throws Exception {
    String priceField = "price";
    Query priceQuery = numericDocValuesScoreQuery(priceField);
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
    Map<String, Float> lowestScoresPerParent = new HashMap<>();
    Map<String, Float> highestScoresPerParent = new HashMap<>();
    int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
    for (int p = 0; p < numParents; p++) {
        String parentId = Integer.toString(p);
        Document parentDoc = new Document();
        parentDoc.add(new StringField("id", parentId, Field.Store.YES));
        parentDoc.add(new StringField("type", "to", Field.Store.NO));
        parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
        iw.addDocument(parentDoc);
        int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
        int lowest = Integer.MAX_VALUE;
        int highest = Integer.MIN_VALUE;
        for (int c = 0; c < numChildren; c++) {
            String childId = Integer.toString(p + c);
            Document childDoc = new Document();
            childDoc.add(new StringField("id", childId, Field.Store.YES));
            childDoc.add(new StringField("type", "from", Field.Store.NO));
            childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
            int price = random().nextInt(1000);
            childDoc.add(new NumericDocValuesField(priceField, price));
            iw.addDocument(childDoc);
            lowest = Math.min(lowest, price);
            highest = Math.max(highest, price);
        }
        lowestScoresPerParent.put(parentId, (float) lowest);
        highestScoresPerParent.put(parentId, (float) highest);
    }
    iw.close();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
    for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
        values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
    fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
    Query toQuery = new TermQuery(new Term("type", "to"));
    Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
    TopDocs topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
    topDocs = searcher.search(joinQuery, numParents);
    assertEquals(numParents, topDocs.totalHits);
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        String id = searcher.doc(scoreDoc.doc).get("id");
        assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
    }
    searcher.getIndexReader().close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) StringField(org.apache.lucene.document.StringField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 19 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class ExpandComponent method process.

@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
        return;
    }
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
                }
            }
        }
    }
    if (field == null) {
        throw new IOException("Expand field is null.");
    }
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
            }
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
        }
    }
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    }
    Arrays.sort(globalDocs);
    Query groupQuery = null;
    /*
    * This code gathers the group information for the current page.
    */
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
        return;
    }
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        }
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
                }
            }
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                    currentValues.advance(contextDoc);
                }
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    ++count;
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            } else {
                if (globalDoc > values.docID()) {
                    values.advance(globalDoc);
                }
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    ++count;
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            }
        }
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            }
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            }
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            }
            if (value != nullValue) {
                ++count;
                groupSet.add(value);
                collapsedSet.add(globalDoc);
            }
        }
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
            }
        }
    }
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    }
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
        newFilters.add(groupQuery);
    }
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }
    if (pfilter.filter == null) {
        searcher.search(query, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
        searcher.search(q, collector);
    }
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
            }
        }
    }
    rb.rsp.add("expanded", outMap);
}

Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice(org.apache.solr.search.DocSlice) ScoreDoc(org.apache.lucene.search.ScoreDoc) FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser(org.apache.solr.search.QParser) SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator(org.apache.solr.search.DocIterator) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin(org.apache.solr.search.CollapsingQParserPlugin) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(org.apache.solr.search.DocList)

Example 20 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class BoolFieldSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final SortedDocValues sindex = DocValues.getSorted(readerContext.reader(), field);
    // figure out what ord maps to true
    int nord = sindex.getValueCount();
    // if no values in the segment, default trueOrd to something other then -1 (missing)
    int tord = -2;
    for (int i = 0; i < nord; i++) {
        final BytesRef br = sindex.lookupOrd(i);
        if (br.length == 1 && br.bytes[br.offset] == 'T') {
            tord = i;
            break;
        }
    }
    final int trueOrd = tord;
    return new BoolDocValues(this) {

        private int getOrdForDoc(int doc) throws IOException {
            if (doc > sindex.docID()) {
                sindex.advance(doc);
            }
            if (doc == sindex.docID()) {
                return sindex.ordValue();
            } else {
                return -1;
            }
        }

        @Override
        public boolean boolVal(int doc) throws IOException {
            return getOrdForDoc(doc) == trueOrd;
        }

        @Override
        public boolean exists(int doc) throws IOException {
            return getOrdForDoc(doc) != -1;
        }

        @Override
        public ValueFiller getValueFiller() {
            return new ValueFiller() {

                private final MutableValueBool mval = new MutableValueBool();

                @Override
                public MutableValue getValue() {
                    return mval;
                }

                @Override
                public void fillValue(int doc) throws IOException {
                    int ord = getOrdForDoc(doc);
                    mval.value = (ord == trueOrd);
                    mval.exists = (ord != -1);
                }
            };
        }
    };
}

Also used : BoolDocValues(org.apache.lucene.queries.function.docvalues.BoolDocValues) MutableValueBool(org.apache.lucene.util.mutable.MutableValueBool) SortedDocValues(org.apache.lucene.index.SortedDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

SortedDocValues (org.apache.lucene.index.SortedDocValues)66 BytesRef (org.apache.lucene.util.BytesRef)32 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)27 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)23 LeafReader (org.apache.lucene.index.LeafReader)22 Document (org.apache.lucene.document.Document)21 NumericDocValues (org.apache.lucene.index.NumericDocValues)15 Directory (org.apache.lucene.store.Directory)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IOException (java.io.IOException)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)12 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 MultiDocValues (org.apache.lucene.index.MultiDocValues)10 ArrayList (java.util.ArrayList)9 IndexReader (org.apache.lucene.index.IndexReader)9 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)9 DoublePoint (org.apache.lucene.document.DoublePoint)8 FloatPoint (org.apache.lucene.document.FloatPoint)8 IntPoint (org.apache.lucene.document.IntPoint)8