Search in sources :

Example 26 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class IndexPlanner method canEvalAllFullText.

private boolean canEvalAllFullText(final IndexingRule indexingRule, FullTextExpression ft) {
    if (ft == null) {
        return false;
    }
    final HashSet<String> relPaths = new HashSet<String>();
    final HashSet<String> nonIndexedPaths = new HashSet<String>();
    final AtomicBoolean relativeParentsFound = new AtomicBoolean();
    final AtomicBoolean nodeScopedCondition = new AtomicBoolean();
    ft.accept(new FullTextVisitor.FullTextVisitorBase() {

        @Override
        public boolean visit(FullTextContains contains) {
            visitTerm(contains.getPropertyName());
            return true;
        }

        @Override
        public boolean visit(FullTextTerm term) {
            visitTerm(term.getPropertyName());
            return true;
        }

        private void visitTerm(String propertyName) {
            String p = propertyName;
            String propertyPath = null;
            String nodePath = null;
            if (p == null) {
                relPaths.add("");
            } else if (p.startsWith("../") || p.startsWith("./")) {
                relPaths.add(p);
                relativeParentsFound.set(true);
            } else if (getDepth(p) > 1) {
                String parent = getParentPath(p);
                if (LucenePropertyIndex.isNodePath(p)) {
                    nodePath = parent;
                } else {
                    propertyPath = p;
                }
                relPaths.add(parent);
            } else {
                propertyPath = p;
                relPaths.add("");
            }
            if (nodePath != null && !indexingRule.isAggregated(nodePath)) {
                nonIndexedPaths.add(p);
            } else if (propertyPath != null) {
                PropertyDefinition pd = indexingRule.getConfig(propertyPath);
                // not indexed
                if (pd == null) {
                    nonIndexedPaths.add(p);
                } else if (!pd.analyzed) {
                    nonIndexedPaths.add(p);
                }
            }
            if (nodeScopedTerm(propertyName)) {
                nodeScopedCondition.set(true);
            }
        }
    });
    if (nodeScopedCondition.get() && !indexingRule.isNodeFullTextIndexed()) {
        return false;
    }
    if (relativeParentsFound.get()) {
        log.debug("Relative parents found {} which are not supported", relPaths);
        return false;
    }
    // have jcr:content as parent. So ensure that relPaths size is 1 or 0
    if (!nonIndexedPaths.isEmpty()) {
        if (relPaths.size() > 1) {
            log.debug("Following relative  property paths are not index", relPaths);
            return false;
        }
        result.setParentPath(Iterables.getOnlyElement(relPaths, ""));
        // Such non indexed path can possibly be evaluated via any rule on nt:base
        // which can possibly index everything
        IndexingRule rule = definition.getApplicableIndexingRule(NT_BASE);
        if (rule == null) {
            return false;
        }
        for (String p : nonIndexedPaths) {
            // if it indexes node scope indexing is enabled
            if (LucenePropertyIndex.isNodePath(p)) {
                if (!rule.isNodeFullTextIndexed()) {
                    return false;
                }
            } else {
                // Index can only evaluate a property like jcr:content/type
                // if it indexes 'type' and that too analyzed
                String propertyName = PathUtils.getName(p);
                PropertyDefinition pd = rule.getConfig(propertyName);
                if (pd == null) {
                    return false;
                }
                if (!pd.analyzed) {
                    return false;
                }
            }
        }
    } else {
        result.setParentPath("");
    }
    return true;
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FullTextVisitor(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor) FullTextTerm(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) FullTextContains(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains) HashSet(java.util.HashSet)

Example 27 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class LuceneIndex method addNonFullTextConstraints.

private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader, Analyzer analyzer, IndexDefinition indexDefinition) {
    if (!filter.matchesAllTypes()) {
        addNodeTypeConstraints(qs, filter);
    }
    String path = filter.getPath();
    switch(filter.getPathRestriction()) {
        case ALL_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if ("/".equals(path)) {
                    break;
                }
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case DIRECT_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case EXACT:
            qs.add(new TermQuery(newPathTerm(path)));
            break;
        case PARENT:
            if (denotesRoot(path)) {
                // there's no parent of the root node
                // we add a path that can not possibly occur because there
                // is no way to say "match no documents" in Lucene
                qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
            } else {
                qs.add(new TermQuery(newPathTerm(getParentPath(path))));
            }
            break;
        case NO_RESTRICTION:
            break;
    }
    // Fulltext index definition used by LuceneIndex only works with old format
    // which is not nodeType based. So just use the nt:base index
    IndexingRule rule = indexDefinition.getApplicableIndexingRule(JcrConstants.NT_BASE);
    for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
        if (pr.first == null && pr.last == null) {
            // queries (OAK-1208)
            continue;
        }
        // check excluded properties and types
        if (isExcludedProperty(pr, rule)) {
            continue;
        }
        String name = pr.propertyName;
        if (QueryConstants.REP_EXCERPT.equals(name) || QueryConstants.OAK_SCORE_EXPLANATION.equals(name) || QueryConstants.REP_FACET.equals(name)) {
            continue;
        }
        if (JCR_PRIMARYTYPE.equals(name)) {
            continue;
        }
        if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
            continue;
        }
        if (skipTokenization(name)) {
            qs.add(new TermQuery(new Term(name, pr.first.getValue(STRING))));
            continue;
        }
        String first = null;
        String last = null;
        boolean isLike = pr.isLike;
        // TODO what to do with escaped tokens?
        if (pr.first != null) {
            first = pr.first.getValue(STRING);
            first = first.replace("\\", "");
        }
        if (pr.last != null) {
            last = pr.last.getValue(STRING);
            last = last.replace("\\", "");
        }
        if (isLike) {
            first = first.replace('%', WildcardQuery.WILDCARD_STRING);
            first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
            int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
            int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
            int len = first.length();
            if (indexOfWS == len || indexOfWC == len) {
                // remove trailing "*" for prefixquery
                first = first.substring(0, first.length() - 1);
                if (JCR_PATH.equals(name)) {
                    qs.add(new PrefixQuery(newPathTerm(first)));
                } else {
                    qs.add(new PrefixQuery(new Term(name, first)));
                }
            } else {
                if (JCR_PATH.equals(name)) {
                    qs.add(new WildcardQuery(newPathTerm(first)));
                } else {
                    qs.add(new WildcardQuery(new Term(name, first)));
                }
            }
            continue;
        }
        if (first != null && first.equals(last) && pr.firstIncluding && pr.lastIncluding) {
            if (JCR_PATH.equals(name)) {
                qs.add(new TermQuery(newPathTerm(first)));
            } else {
                if ("*".equals(name)) {
                    addReferenceConstraint(first, qs, reader);
                } else {
                    for (String t : tokenize(first, analyzer)) {
                        qs.add(new TermQuery(new Term(name, t)));
                    }
                }
            }
            continue;
        }
        first = tokenizeAndPoll(first, analyzer);
        last = tokenizeAndPoll(last, analyzer);
        qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) WildcardQuery(org.apache.lucene.search.WildcardQuery) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term) FullTextTerm(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm) TermFactory.newPathTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm) TermFactory.newFulltextTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm)

Example 28 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class LucenePropertyIndex method tokenToQuery.

private static Query tokenToQuery(String text, String fieldName, PlanResult pr, Analyzer analyzer, FulltextQueryTermsProvider augmentor) {
    Query ret;
    IndexingRule indexingRule = pr.indexingRule;
    // Expand the query on fulltext field
    if (FieldNames.FULLTEXT.equals(fieldName) && !indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
        BooleanQuery in = new BooleanQuery();
        for (PropertyDefinition pd : indexingRule.getNodeScopeAnalyzedProps()) {
            Query q = tokenToQuery(text, FieldNames.createAnalyzedFieldName(pd.name), analyzer);
            q.setBoost(pd.boost);
            in.add(q, BooleanClause.Occur.SHOULD);
        }
        // Add the query for actual fulltext field also. That query would
        // not be boosted
        in.add(tokenToQuery(text, fieldName, analyzer), BooleanClause.Occur.SHOULD);
        ret = in;
    } else {
        ret = tokenToQuery(text, fieldName, analyzer);
    }
    // Augment query terms if available (as a 'SHOULD' clause)
    if (augmentor != null && FieldNames.FULLTEXT.equals(fieldName)) {
        Query subQuery = augmentor.getQueryTerm(text, analyzer, pr.indexDefinition.getDefinitionNodeState());
        if (subQuery != null) {
            BooleanQuery query = new BooleanQuery();
            query.add(ret, BooleanClause.Occur.SHOULD);
            query.add(subQuery, BooleanClause.Occur.SHOULD);
            ret = query;
        }
    }
    return ret;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery)

Example 29 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class LuceneIndexer method index.

@Override
public boolean index(NodeStateEntry entry) throws IOException, CommitFailedException {
    if (getFilterResult(entry.getPath()) != PathFilter.Result.INCLUDE) {
        return false;
    }
    IndexingRule indexingRule = definition.getApplicableIndexingRule(entry.getNodeState());
    if (indexingRule == null) {
        return false;
    }
    LuceneDocumentMaker maker = newDocumentMaker(indexingRule, entry.getPath());
    Document doc = maker.makeDocument(entry.getNodeState());
    if (doc != null) {
        writeToIndex(doc, entry.getPath());
        progressReporter.indexUpdate(definition.getIndexPath());
        return true;
    }
    return false;
}
Also used : IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) LuceneDocumentMaker(org.apache.jackrabbit.oak.plugins.index.lucene.LuceneDocumentMaker) Document(org.apache.lucene.document.Document) NodeDocument(org.apache.jackrabbit.oak.plugins.document.NodeDocument)

Aggregations

IndexingRule (org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule)29 LuceneIndexHelper.newLuceneIndexDefinition (org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition)24 LuceneIndexHelper.newLucenePropertyIndexDefinition (org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLucenePropertyIndexDefinition)24 Test (org.junit.Test)24 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)13 IndexDefinitionBuilder (org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder)6 PropertyRestriction (org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction)2 FullTextTerm (org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm)2 PrefixQuery (org.apache.lucene.search.PrefixQuery)2 TermQuery (org.apache.lucene.search.TermQuery)2 WildcardQuery (org.apache.lucene.search.WildcardQuery)2 Maps.newHashMap (com.google.common.collect.Maps.newHashMap)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 NodeDocument (org.apache.jackrabbit.oak.plugins.document.NodeDocument)1 LuceneDocumentMaker (org.apache.jackrabbit.oak.plugins.index.lucene.LuceneDocumentMaker)1 TermFactory.newFulltextTerm (org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm)1 TermFactory.newPathTerm (org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm)1