Search in sources :

Example 31 with Facets

use of org.apache.lucene.facet.Facets in project jackrabbit-oak by apache.

the class LucenePropertyIndex method query.

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {

        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();

        private final Set<String> seenPaths = Sets.newHashSet();

        private ScoreDoc lastDoc;

        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;

        private boolean noDocs = false;

        private IndexSearcher indexSearcher;

        private int indexNodeId = -1;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            releaseSearcher();
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);
                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }
                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
             * Loads the lucene documents in batches
             * @return true if any document is loaded
             */
        private boolean loadDocs() {
            if (noDocs) {
                return false;
            }
            ScoreDoc lastDocToRecord = null;
            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = getCurrentSearcher(indexNode);
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);
                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }
                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");
                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();
                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }
                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }
                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }
                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }
                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
                        query = addDescendantClauseIfRequired(query, plan);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer());
                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
                        query = addDescendantClauseIfRequired(query, plan);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }
            return !queue.isEmpty();
        }

        private IndexSearcher getCurrentSearcher(IndexNode indexNode) {
            //the searcher would be refreshed as done earlier
            if (indexNodeId != indexNode.getIndexNodeId()) {
                //if already initialized then log about change
                if (indexNodeId > 0) {
                    LOG.debug("Change in index version detected. Query would be performed without offset");
                }
                //TODO Add testcase for this scenario
                indexSearcher = indexNode.getSearcher();
                indexNodeId = indexNode.getIndexNodeId();
                lastDoc = null;
            }
            return indexSearcher;
        }

        private void releaseSearcher() {
            //For now nullifying it.
            indexSearcher = null;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {

        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) PlanResult(org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult) Set(java.util.Set) Facets(org.apache.lucene.facet.Facets) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) QueryEngineSettings(org.apache.jackrabbit.oak.query.QueryEngineSettings) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) Sort(org.apache.lucene.search.Sort) Lookup(org.apache.lucene.search.suggest.Lookup) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) AbstractIterator(com.google.common.collect.AbstractIterator) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) IOException(java.io.IOException) Deque(java.util.Deque) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) ParseException(org.apache.lucene.queryparser.classic.ParseException) IOException(java.io.IOException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) FieldInfos(org.apache.lucene.index.FieldInfos) StandardQueryParser(org.apache.lucene.queryparser.flexible.standard.StandardQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellcheckHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper)

Example 32 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestRangeFacetCounts method testBasicLong.

public void testBasicLong() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
    doc.add(field);
    for (long l = 0; l < 100; l++) {
        field.setLongValue(l);
        w.addDocument(doc);
    }
    // Also add Long.MAX_VALUE
    field.setLongValue(Long.MAX_VALUE);
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    FacetsCollector fc = new FacetsCollector();
    IndexSearcher s = newSearcher(r);
    s.search(new MatchAllDocsQuery(), fc);
    Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, true));
    FacetResult result = facets.getTopChildren(10, "field");
    assertEquals("dim=field path=[] value=22 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (1)\n", result.toString());
    r.close();
    d.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FacetsCollector(org.apache.lucene.facet.FacetsCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) FacetResult(org.apache.lucene.facet.FacetResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 33 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestRangeFacetCounts method testOverlappedEndStart.

public void testOverlappedEndStart() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
    doc.add(field);
    for (long l = 0; l < 100; l++) {
        field.setLongValue(l);
        w.addDocument(doc);
    }
    field.setLongValue(Long.MAX_VALUE);
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    FacetsCollector fc = new FacetsCollector();
    IndexSearcher s = newSearcher(r);
    s.search(new MatchAllDocsQuery(), fc);
    Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("0-10", 0L, true, 10L, true), new LongRange("10-20", 10L, true, 20L, true), new LongRange("20-30", 20L, true, 30L, true), new LongRange("30-40", 30L, true, 40L, true));
    FacetResult result = facets.getTopChildren(10, "field");
    assertEquals("dim=field path=[] value=41 childCount=4\n  0-10 (11)\n  10-20 (11)\n  20-30 (11)\n  30-40 (11)\n", result.toString());
    r.close();
    d.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FacetsCollector(org.apache.lucene.facet.FacetsCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) FacetResult(org.apache.lucene.facet.FacetResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 34 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestRangeFacetCounts method testMissingValues.

// LUCENE-5178
public void testMissingValues() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
    doc.add(field);
    for (long l = 0; l < 100; l++) {
        if (l % 5 == 0) {
            // Every 5th doc is missing the value:
            w.addDocument(new Document());
            continue;
        }
        field.setLongValue(l);
        w.addDocument(doc);
    }
    IndexReader r = w.getReader();
    FacetsCollector fc = new FacetsCollector();
    IndexSearcher s = newSearcher(r);
    s.search(new MatchAllDocsQuery(), fc);
    Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false));
    assertEquals("dim=field path=[] value=16 childCount=5\n  less than 10 (8)\n  less than or equal to 10 (8)\n  over 90 (8)\n  90 or above (8)\n  over 1000 (0)\n", facets.getTopChildren(10, "field").toString());
    w.close();
    IOUtils.close(r, d);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 35 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestRangeFacetCounts method testLongMinMax.

public void testLongMinMax() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
    doc.add(field);
    field.setLongValue(Long.MIN_VALUE);
    w.addDocument(doc);
    field.setLongValue(0);
    w.addDocument(doc);
    field.setLongValue(Long.MAX_VALUE);
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    FacetsCollector fc = new FacetsCollector();
    IndexSearcher s = newSearcher(r);
    s.search(new MatchAllDocsQuery(), fc);
    Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("min", Long.MIN_VALUE, true, Long.MIN_VALUE, true), new LongRange("max", Long.MAX_VALUE, true, Long.MAX_VALUE, true), new LongRange("all0", Long.MIN_VALUE, true, Long.MAX_VALUE, true), new LongRange("all1", Long.MIN_VALUE, false, Long.MAX_VALUE, true), new LongRange("all2", Long.MIN_VALUE, true, Long.MAX_VALUE, false), new LongRange("all3", Long.MIN_VALUE, false, Long.MAX_VALUE, false));
    FacetResult result = facets.getTopChildren(10, "field");
    assertEquals("dim=field path=[] value=3 childCount=6\n  min (1)\n  max (1)\n  all0 (3)\n  all1 (2)\n  all2 (2)\n  all3 (1)\n", result.toString());
    r.close();
    d.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) IndexReader(org.apache.lucene.index.IndexReader) FacetResult(org.apache.lucene.facet.FacetResult) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Aggregations

Facets (org.apache.lucene.facet.Facets)72 FacetsCollector (org.apache.lucene.facet.FacetsCollector)60 FacetResult (org.apache.lucene.facet.FacetResult)47 IndexSearcher (org.apache.lucene.search.IndexSearcher)46 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)37 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)35 Directory (org.apache.lucene.store.Directory)34 Document (org.apache.lucene.document.Document)32 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)32 FacetsConfig (org.apache.lucene.facet.FacetsConfig)29 DirectoryReader (org.apache.lucene.index.DirectoryReader)27 LabelAndValue (org.apache.lucene.facet.LabelAndValue)23 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)22 FacetField (org.apache.lucene.facet.FacetField)19 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)14 DefaultSortedSetDocValuesReaderState (org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState)14 SortedSetDocValuesFacetCounts (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts)14 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)13 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)13