Search in sources :

Example 16 with FullTextExpression

use of org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression in project jackrabbit-oak by apache.

the class SelectorImpl method createFilter.

/**
 * Create the filter condition for planning or execution.
 *
 * @param preparing whether a filter for the prepare phase should be made
 * @return the filter
 */
@Override
public FilterImpl createFilter(boolean preparing) {
    FilterImpl f = new FilterImpl(this, query.getStatement(), query.getSettings());
    f.setPreparing(preparing);
    if (joinCondition != null) {
        joinCondition.restrict(f);
    }
    // we will need the excerpt
    for (ColumnImpl c : query.getColumns()) {
        if (c.getSelector().equals(this)) {
            String columnName = c.getColumnName();
            if (columnName.equals(QueryConstants.OAK_SCORE_EXPLANATION)) {
                f.restrictProperty(columnName, Operator.NOT_EQUAL, null);
            } else if (columnName.startsWith(QueryConstants.REP_EXCERPT)) {
                f.restrictProperty(QueryConstants.REP_EXCERPT, Operator.EQUAL, PropertyValues.newString(columnName));
            } else if (columnName.startsWith(QueryConstants.REP_FACET)) {
                f.restrictProperty(QueryConstants.REP_FACET, Operator.EQUAL, PropertyValues.newString(columnName));
            }
        }
    }
    // (".. is null" must be written as "not .. is not null").
    if (queryConstraint != null) {
        queryConstraint.restrict(f);
        FullTextExpression ft = queryConstraint.getFullTextConstraint(this);
        f.setFullTextConstraint(ft);
    }
    for (ConstraintImpl constraint : selectorConstraints) {
        constraint.restrict(f);
    }
    QueryOptions options = query.getQueryOptions();
    if (options != null) {
        if (options.indexName != null) {
            f.restrictProperty(IndexConstants.INDEX_NAME_OPTION, Operator.EQUAL, PropertyValues.newString(options.indexName));
        }
        if (options.indexTag != null) {
            f.restrictProperty(IndexConstants.INDEX_TAG_OPTION, Operator.EQUAL, PropertyValues.newString(options.indexTag));
        }
    }
    return f;
}
Also used : FilterImpl(org.apache.jackrabbit.oak.query.index.FilterImpl) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression) QueryOptions(org.apache.jackrabbit.oak.query.QueryOptions)

Example 17 with FullTextExpression

use of org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression in project jackrabbit-oak by apache.

the class AggregateIndex method query.

@Override
public Cursor query(IndexPlan plan, NodeState rootState) {
    if (getNodeAggregator() == null) {
        // path-though
        return baseIndex.query(plan, rootState);
    } else if (!(plan instanceof AggregateIndexPlan)) {
        // remove the node type restriction
        plan.setFilter(newAggregationFilter(plan.getFilter(), null));
        return newAggregationCursor(plan, rootState);
    }
    Filter filter = plan.getFilter();
    AggregateIndexPlan aggPlan = (AggregateIndexPlan) plan;
    FullTextExpression constraint = filter.getFullTextConstraint();
    return flatten(constraint, aggPlan, filter, rootState, "");
}
Also used : Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression)

Example 18 with FullTextExpression

use of org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression in project jackrabbit-oak by apache.

the class LuceneIndex method query.

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    final Set<String> relPaths = getRelativePaths(ft);
    if (relPaths.size() > 1) {
        return new MultiLuceneIndex(filter, rootState, relPaths).query();
    }
    final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
    // we only restrict non-full-text conditions if there is
    // no relative property in the full-text constraint
    final boolean nonFullTextConstraints = parent.isEmpty();
    final int parentDepth = getDepth(parent);
    QueryLimits settings = filter.getQueryLimits();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {

        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();

        private final Set<String> seenPaths = Sets.newHashSet();

        private ScoreDoc lastDoc;

        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;

        private boolean noDocs = false;

        private long lastSearchIndexerVersion;

        private int reloadCount;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (!parent.isEmpty()) {
                    // TODO OAK-828 this breaks node aggregation
                    // get the base path
                    // ensure the path ends with the given
                    // relative path
                    // if (!path.endsWith("/" + parent)) {
                    // continue;
                    // }
                    path = getAncestorPath(path, parentDepth);
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        return null;
                    }
                    seenPaths.add(path);
                }
                return new LuceneResultRow(path, doc.score, excerpt);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {
            if (noDocs) {
                return false;
            }
            ScoreDoc lastDocToRecord = null;
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TopDocs docs;
                    long time = System.currentTimeMillis();
                    checkForIndexVersionChange(searcher);
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            docs = searcher.search(query, nextBatchSize);
                        }
                        time = System.currentTimeMillis() - time;
                        LOG.debug("... took {} ms", time);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                        }
                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(analyzer, searcher, doc);
                            }
                            LuceneResultRow row = convertToRow(doc, searcher, excerpt);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }
                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
                    // ACL filter spellchecks
                    Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(suggestion.string);
                                    break;
                                }
                            }
                        }
                    }
                    queue.add(new LuceneResultRow(suggestedWords));
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
                    // ACL filter suggestions
                    Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer());
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add("{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
                                    break;
                                }
                            }
                        }
                    }
                    queue.add(new LuceneResultRow(suggestedWords));
                }
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }
            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = LucenePropertyIndex.getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                reloadCount++;
                if (reloadCount > MAX_RELOAD_COUNT) {
                    LOG.error("More than {} index version changes detected for query {}", MAX_RELOAD_COUNT, plan);
                    throw new IllegalStateException("Too many version changes");
                }
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset; reload {}", currentVersion, lastSearchIndexerVersion, reloadCount);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {

        @Override
        public long getSize() {
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, settings, sizeEstimator, filter);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Set(java.util.Set) HashSet(java.util.HashSet) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryLimits(org.apache.jackrabbit.oak.spi.query.QueryLimits) TopDocs(org.apache.lucene.search.TopDocs) PathStoredFieldVisitor(org.apache.jackrabbit.oak.plugins.index.lucene.util.PathStoredFieldVisitor) Lookup(org.apache.lucene.search.suggest.Lookup) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) AbstractIterator(com.google.common.collect.AbstractIterator) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) IOException(java.io.IOException) Deque(java.util.Deque) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellcheckHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper) Collection(java.util.Collection)

Example 19 with FullTextExpression

use of org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression in project jackrabbit-oak by apache.

the class LuceneIndex method getFullTextQuery.

static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer, final IndexReader reader) {
    // a reference to the query, so it can be set in the visitor
    // (a "non-local return")
    final AtomicReference<Query> result = new AtomicReference<Query>();
    ft.accept(new FullTextVisitor() {

        @Override
        public boolean visit(FullTextContains contains) {
            return contains.getBase().accept(this);
        }

        @Override
        public boolean visit(FullTextOr or) {
            BooleanQuery q = new BooleanQuery();
            for (FullTextExpression e : or.list) {
                Query x = getFullTextQuery(e, analyzer, reader);
                q.add(x, SHOULD);
            }
            result.set(q);
            return true;
        }

        @Override
        public boolean visit(FullTextAnd and) {
            BooleanQuery q = new BooleanQuery();
            for (FullTextExpression e : and.list) {
                Query x = getFullTextQuery(e, analyzer, reader);
                /* Only unwrap the clause if MUST_NOT(x) */
                boolean hasMustNot = false;
                if (x instanceof BooleanQuery) {
                    BooleanQuery bq = (BooleanQuery) x;
                    if ((bq.getClauses().length == 1) && (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) {
                        hasMustNot = true;
                        q.add(bq.getClauses()[0]);
                    }
                }
                if (!hasMustNot) {
                    q.add(x, MUST);
                }
            }
            result.set(q);
            return true;
        }

        @Override
        public boolean visit(FullTextTerm term) {
            return visitTerm(term.getPropertyName(), term.getText(), term.getBoost(), term.isNot());
        }

        private boolean visitTerm(String propertyName, String text, String boost, boolean not) {
            String p = propertyName;
            if (p != null && p.indexOf('/') >= 0) {
                p = getName(p);
            }
            Query q = tokenToQuery(text, p, analyzer, reader);
            if (q == null) {
                return false;
            }
            if (boost != null) {
                q.setBoost(Float.parseFloat(boost));
            }
            if (not) {
                BooleanQuery bq = new BooleanQuery();
                bq.add(q, MUST_NOT);
                result.set(bq);
            } else {
                result.set(q);
            }
            return true;
        }
    });
    return result.get();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) FullTextOr(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr) FullTextContains(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains) AtomicReference(java.util.concurrent.atomic.AtomicReference) FullTextVisitor(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor) FullTextTerm(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm) FullTextAnd(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression)

Example 20 with FullTextExpression

use of org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression in project jackrabbit-oak by apache.

the class LuceneIndex method getLuceneRequest.

/**
 * Get the Lucene query for the given filter.
 *
 * @param filter the filter, including full-text constraint
 * @param reader the Lucene reader
 * @param nonFullTextConstraints whether non-full-text constraints (such a
 *            path, node type, and so on) should be added to the Lucene
 *            query
 * @param indexDefinition nodestate that contains the index definition
 * @return the Lucene query
 */
private static LuceneRequestFacade getLuceneRequest(Filter filter, IndexReader reader, boolean nonFullTextConstraints, IndexDefinition indexDefinition) {
    List<Query> qs = new ArrayList<Query>();
    Analyzer analyzer = indexDefinition.getAnalyzer();
    FullTextExpression ft = filter.getFullTextConstraint();
    if (ft == null) {
    // there might be no full-text constraint
    // when using the LowCostLuceneIndexProvider
    // which is used for testing
    } else {
        qs.add(getFullTextQuery(ft, analyzer, reader));
    }
    PropertyRestriction pr = filter.getPropertyRestriction(NATIVE_QUERY_FUNCTION);
    if (pr != null) {
        String query = String.valueOf(pr.first.getValue(pr.first.getType()));
        QueryParser queryParser = new QueryParser(VERSION, "", indexDefinition.getAnalyzer());
        if (query.startsWith("mlt?")) {
            String mltQueryString = query.replace("mlt?", "");
            if (reader != null) {
                Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
                if (moreLikeThis != null) {
                    qs.add(moreLikeThis);
                }
            }
        }
        if (query.startsWith("spellcheck?")) {
            String spellcheckQueryString = query.replace("spellcheck?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
            }
        } else if (query.startsWith("suggest?")) {
            String suggestQueryString = query.replace("suggest?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
            }
        } else {
            try {
                qs.add(queryParser.parse(query));
            } catch (ParseException e) {
                throw new RuntimeException(e);
            }
        }
    } else if (nonFullTextConstraints) {
        addNonFullTextConstraints(qs, filter, reader, analyzer, indexDefinition);
    }
    if (qs.size() == 0) {
        return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
    }
    return LucenePropertyIndex.performAdditionalWraps(qs);
}
Also used : PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression) SpellcheckHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Aggregations

FullTextExpression (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression)21 Filter (org.apache.jackrabbit.oak.spi.query.Filter)7 FullTextAnd (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd)6 FullTextContains (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains)6 FullTextTerm (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm)5 BooleanQuery (org.apache.lucene.search.BooleanQuery)5 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)5 PrefixQuery (org.apache.lucene.search.PrefixQuery)5 Query (org.apache.lucene.search.Query)5 TermQuery (org.apache.lucene.search.TermQuery)5 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)5 WildcardQuery (org.apache.lucene.search.WildcardQuery)5 Test (org.junit.Test)5 PropertyRestriction (org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction)4 FullTextOr (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr)4 FullTextVisitor (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor)4 ArrayList (java.util.ArrayList)3 AtomicReference (java.util.concurrent.atomic.AtomicReference)3 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)3 PhraseQuery (org.apache.lucene.search.PhraseQuery)3