Search in sources :

Example 11 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class IndexPlanner method getPlanBuilder.

private IndexPlan.Builder getPlanBuilder() {
    log.trace("Evaluating plan with index definition {}", definition);
    FullTextExpression ft = filter.getFullTextConstraint();
    if (!definition.getVersion().isAtLeast(IndexFormatVersion.V2)) {
        log.trace("Index is old format. Not supported");
        return null;
    }
    //Query Fulltext and Index does not support fulltext
    if (ft != null && !definition.isFullTextEnabled()) {
        return null;
    }
    IndexingRule indexingRule = getApplicableRule();
    if (indexingRule == null) {
        return null;
    }
    //Query Fulltext and indexing rule does not support fulltext
    if (ft != null && !indexingRule.isFulltextEnabled()) {
        return null;
    }
    if (!checkForQueryPaths()) {
        log.trace("Opting out due mismatch between path restriction {} and query paths {}", filter.getPath(), definition.getQueryPaths());
        return null;
    }
    result = new PlanResult(indexPath, definition, indexingRule);
    if (definition.hasFunctionDefined() && filter.getPropertyRestriction(definition.getFunctionName()) != null) {
        return getNativeFunctionPlanBuilder(indexingRule.getBaseNodeType());
    }
    List<String> indexedProps = newArrayListWithCapacity(filter.getPropertyRestrictions().size());
    for (PropertyDefinition functionIndex : indexingRule.getFunctionRestrictions()) {
        for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
            String f = functionIndex.function;
            if (pr.propertyName.equals(f)) {
                indexedProps.add(f);
                result.propDefns.put(f, functionIndex);
            }
        }
    }
    //Optimization - Go further only if any of the property is configured
    //for property index
    List<String> facetFields = new LinkedList<String>();
    if (indexingRule.propertyIndexEnabled) {
        for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
            String name = pr.propertyName;
            if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
                continue;
            }
            if (name.startsWith(QueryConstants.FUNCTION_RESTRICTION_PREFIX)) {
                // function-based indexes were handled before
                continue;
            }
            if (QueryImpl.REP_FACET.equals(pr.propertyName)) {
                String value = pr.first.getValue(Type.STRING);
                facetFields.add(FacetHelper.parseFacetField(value));
            }
            PropertyDefinition pd = indexingRule.getConfig(pr.propertyName);
            if (pd != null && pd.propertyIndexEnabled()) {
                if (pr.isNullRestriction() && !pd.nullCheckEnabled) {
                    continue;
                }
                indexedProps.add(name);
                result.propDefns.put(name, pd);
            }
        }
    }
    boolean evalNodeTypeRestrictions = canEvalNodeTypeRestrictions(indexingRule);
    boolean evalPathRestrictions = canEvalPathRestrictions(indexingRule);
    boolean canEvalAlFullText = canEvalAllFullText(indexingRule, ft);
    boolean canEvalNodeNameRestriction = canEvalNodeNameRestriction(indexingRule);
    if (ft != null && !canEvalAlFullText) {
        return null;
    }
    //Fulltext expression can also be like jcr:contains(jcr:content/metadata/@format, 'image')
    List<OrderEntry> sortOrder = createSortOrder(indexingRule);
    boolean canSort = canSortByProperty(sortOrder);
    if (!indexedProps.isEmpty() || canSort || ft != null || evalPathRestrictions || evalNodeTypeRestrictions || canEvalNodeNameRestriction) {
        //TODO Need a way to have better cost estimate to indicate that
        //this index can evaluate more propertyRestrictions natively (if more props are indexed)
        //For now we reduce cost per entry
        int costPerEntryFactor = indexedProps.size();
        costPerEntryFactor += sortOrder.size();
        //this index can evaluate more propertyRestrictions natively (if more props are indexed)
        //For now we reduce cost per entry
        IndexPlan.Builder plan = defaultPlan();
        if (!sortOrder.isEmpty()) {
            plan.setSortOrder(sortOrder);
        }
        if (costPerEntryFactor == 0) {
            costPerEntryFactor = 1;
        }
        if (facetFields.size() > 0) {
            plan.setAttribute(FacetHelper.ATTR_FACET_FIELDS, facetFields);
        }
        if (ft == null) {
            result.enableNonFullTextConstraints();
        }
        if (evalNodeTypeRestrictions) {
            result.enableNodeTypeEvaluation();
        }
        if (canEvalNodeNameRestriction) {
            result.enableNodeNameRestriction();
        }
        return plan.setCostPerEntry(definition.getCostPerEntry() / costPerEntryFactor);
    }
    return null;
}
Also used : PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) LinkedList(java.util.LinkedList) OrderEntry(org.apache.jackrabbit.oak.spi.query.QueryIndex.OrderEntry) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) IndexPlan(org.apache.jackrabbit.oak.spi.query.QueryIndex.IndexPlan) FullTextExpression(org.apache.jackrabbit.oak.query.fulltext.FullTextExpression)

Example 12 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class IndexPlanner method canEvalAllFullText.

private boolean canEvalAllFullText(final IndexingRule indexingRule, FullTextExpression ft) {
    if (ft == null) {
        return false;
    }
    final HashSet<String> relPaths = new HashSet<String>();
    final HashSet<String> nonIndexedPaths = new HashSet<String>();
    final AtomicBoolean relativeParentsFound = new AtomicBoolean();
    final AtomicBoolean nodeScopedCondition = new AtomicBoolean();
    ft.accept(new FullTextVisitor.FullTextVisitorBase() {

        @Override
        public boolean visit(FullTextContains contains) {
            visitTerm(contains.getPropertyName());
            return true;
        }

        @Override
        public boolean visit(FullTextTerm term) {
            visitTerm(term.getPropertyName());
            return true;
        }

        private void visitTerm(String propertyName) {
            String p = propertyName;
            String propertyPath = null;
            String nodePath = null;
            if (p == null) {
                relPaths.add("");
            } else if (p.startsWith("../") || p.startsWith("./")) {
                relPaths.add(p);
                relativeParentsFound.set(true);
            } else if (getDepth(p) > 1) {
                String parent = getParentPath(p);
                if (LucenePropertyIndex.isNodePath(p)) {
                    nodePath = parent;
                } else {
                    propertyPath = p;
                }
                relPaths.add(parent);
            } else {
                propertyPath = p;
                relPaths.add("");
            }
            if (nodePath != null && !indexingRule.isAggregated(nodePath)) {
                nonIndexedPaths.add(p);
            } else if (propertyPath != null) {
                PropertyDefinition pd = indexingRule.getConfig(propertyPath);
                //not indexed
                if (pd == null) {
                    nonIndexedPaths.add(p);
                } else if (!pd.analyzed) {
                    nonIndexedPaths.add(p);
                }
            }
            if (nodeScopedTerm(propertyName)) {
                nodeScopedCondition.set(true);
            }
        }
    });
    if (nodeScopedCondition.get() && !indexingRule.isNodeFullTextIndexed()) {
        return false;
    }
    if (relativeParentsFound.get()) {
        log.debug("Relative parents found {} which are not supported", relPaths);
        return false;
    }
    //have jcr:content as parent. So ensure that relPaths size is 1 or 0
    if (!nonIndexedPaths.isEmpty()) {
        if (relPaths.size() > 1) {
            log.debug("Following relative  property paths are not index", relPaths);
            return false;
        }
        result.setParentPath(Iterables.getOnlyElement(relPaths, ""));
        //Such non indexed path can possibly be evaluated via any rule on nt:base
        //which can possibly index everything
        IndexingRule rule = definition.getApplicableIndexingRule(NT_BASE);
        if (rule == null) {
            return false;
        }
        for (String p : nonIndexedPaths) {
            //if it indexes node scope indexing is enabled
            if (LucenePropertyIndex.isNodePath(p)) {
                if (!rule.isNodeFullTextIndexed()) {
                    return false;
                }
            } else {
                //Index can only evaluate a property like jcr:content/type
                //if it indexes 'type' and that too analyzed
                String propertyName = PathUtils.getName(p);
                PropertyDefinition pd = rule.getConfig(propertyName);
                if (pd == null) {
                    return false;
                }
                if (!pd.analyzed) {
                    return false;
                }
            }
        }
    } else {
        result.setParentPath("");
    }
    return true;
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FullTextVisitor(org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor) FullTextTerm(org.apache.jackrabbit.oak.query.fulltext.FullTextTerm) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) FullTextContains(org.apache.jackrabbit.oak.query.fulltext.FullTextContains) HashSet(java.util.HashSet)

Example 13 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class LuceneIndex method addNonFullTextConstraints.

private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader, Analyzer analyzer, IndexDefinition indexDefinition) {
    if (!filter.matchesAllTypes()) {
        addNodeTypeConstraints(qs, filter);
    }
    String path = filter.getPath();
    switch(filter.getPathRestriction()) {
        case ALL_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if ("/".equals(path)) {
                    break;
                }
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case DIRECT_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case EXACT:
            qs.add(new TermQuery(newPathTerm(path)));
            break;
        case PARENT:
            if (denotesRoot(path)) {
                // there's no parent of the root node
                // we add a path that can not possibly occur because there
                // is no way to say "match no documents" in Lucene
                qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
            } else {
                qs.add(new TermQuery(newPathTerm(getParentPath(path))));
            }
            break;
        case NO_RESTRICTION:
            break;
    }
    //Fulltext index definition used by LuceneIndex only works with old format
    //which is not nodeType based. So just use the nt:base index
    IndexingRule rule = indexDefinition.getApplicableIndexingRule(JcrConstants.NT_BASE);
    for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
        if (pr.first == null && pr.last == null) {
            // queries (OAK-1208)
            continue;
        }
        // check excluded properties and types
        if (isExcludedProperty(pr, rule)) {
            continue;
        }
        String name = pr.propertyName;
        if (QueryImpl.REP_EXCERPT.equals(name) || QueryImpl.OAK_SCORE_EXPLANATION.equals(name) || QueryImpl.REP_FACET.equals(name)) {
            continue;
        }
        if (JCR_PRIMARYTYPE.equals(name)) {
            continue;
        }
        if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
            continue;
        }
        if (skipTokenization(name)) {
            qs.add(new TermQuery(new Term(name, pr.first.getValue(STRING))));
            continue;
        }
        String first = null;
        String last = null;
        boolean isLike = pr.isLike;
        // TODO what to do with escaped tokens?
        if (pr.first != null) {
            first = pr.first.getValue(STRING);
            first = first.replace("\\", "");
        }
        if (pr.last != null) {
            last = pr.last.getValue(STRING);
            last = last.replace("\\", "");
        }
        if (isLike) {
            first = first.replace('%', WildcardQuery.WILDCARD_STRING);
            first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
            int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
            int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
            int len = first.length();
            if (indexOfWS == len || indexOfWC == len) {
                // remove trailing "*" for prefixquery
                first = first.substring(0, first.length() - 1);
                if (JCR_PATH.equals(name)) {
                    qs.add(new PrefixQuery(newPathTerm(first)));
                } else {
                    qs.add(new PrefixQuery(new Term(name, first)));
                }
            } else {
                if (JCR_PATH.equals(name)) {
                    qs.add(new WildcardQuery(newPathTerm(first)));
                } else {
                    qs.add(new WildcardQuery(new Term(name, first)));
                }
            }
            continue;
        }
        if (first != null && first.equals(last) && pr.firstIncluding && pr.lastIncluding) {
            if (JCR_PATH.equals(name)) {
                qs.add(new TermQuery(newPathTerm(first)));
            } else {
                if ("*".equals(name)) {
                    addReferenceConstraint(first, qs, reader);
                } else {
                    for (String t : tokenize(first, analyzer)) {
                        qs.add(new TermQuery(new Term(name, t)));
                    }
                }
            }
            continue;
        }
        first = tokenizeAndPoll(first, analyzer);
        last = tokenizeAndPoll(last, analyzer);
        qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) WildcardQuery(org.apache.lucene.search.WildcardQuery) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term) TermFactory.newPathTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm) TermFactory.newFulltextTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm) FullTextTerm(org.apache.jackrabbit.oak.query.fulltext.FullTextTerm)

Example 14 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class LucenePropertyIndex method tokenToQuery.

private static Query tokenToQuery(String text, String fieldName, PlanResult pr, Analyzer analyzer, FulltextQueryTermsProvider augmentor) {
    Query ret;
    IndexingRule indexingRule = pr.indexingRule;
    //Expand the query on fulltext field
    if (FieldNames.FULLTEXT.equals(fieldName) && !indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
        BooleanQuery in = new BooleanQuery();
        for (PropertyDefinition pd : indexingRule.getNodeScopeAnalyzedProps()) {
            Query q = tokenToQuery(text, FieldNames.createAnalyzedFieldName(pd.name), analyzer);
            q.setBoost(pd.boost);
            in.add(q, BooleanClause.Occur.SHOULD);
        }
        //Add the query for actual fulltext field also. That query would
        //not be boosted
        in.add(tokenToQuery(text, fieldName, analyzer), BooleanClause.Occur.SHOULD);
        ret = in;
    } else {
        ret = tokenToQuery(text, fieldName, analyzer);
    }
    //Augment query terms if available (as a 'SHOULD' clause)
    if (augmentor != null && FieldNames.FULLTEXT.equals(fieldName)) {
        Query subQuery = augmentor.getQueryTerm(text, analyzer, pr.indexDefinition.getDefinitionNodeState());
        if (subQuery != null) {
            BooleanQuery query = new BooleanQuery();
            query.add(ret, BooleanClause.Occur.SHOULD);
            query.add(subQuery, BooleanClause.Occur.SHOULD);
            ret = query;
        }
    }
    return ret;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery)

Example 15 with IndexingRule

use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.

the class IndexDefinitionTest method propertyRegExAndRelativeProperty.

@Test
public void propertyRegExAndRelativeProperty() throws Exception {
    NodeBuilder defnb = newLuceneIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "lucene", of(TYPENAME_STRING), of("foo"), "async");
    IndexDefinition defn = new IndexDefinition(root, defnb.getNodeState(), "/foo");
    assertTrue(defn.isOfOldFormat());
    NodeBuilder updated = IndexDefinition.updateDefinition(defnb.getNodeState().builder());
    IndexDefinition defn2 = new IndexDefinition(root, updated.getNodeState(), "/foo");
    IndexingRule rule = defn2.getApplicableIndexingRule(asState(newNode("nt:base")));
    assertNotNull(rule.getConfig("foo"));
    assertNull("Property regex used should not allow relative properties", rule.getConfig("foo/bar"));
}
Also used : IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) LuceneIndexHelper.newLucenePropertyIndexDefinition(org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLucenePropertyIndexDefinition) LuceneIndexHelper.newLuceneIndexDefinition(org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) Test(org.junit.Test)

Aggregations

IndexingRule (org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule)22 LuceneIndexHelper.newLuceneIndexDefinition (org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition)18 LuceneIndexHelper.newLucenePropertyIndexDefinition (org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLucenePropertyIndexDefinition)18 Test (org.junit.Test)18 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)13 FullTextTerm (org.apache.jackrabbit.oak.query.fulltext.FullTextTerm)2 PropertyRestriction (org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction)2 PrefixQuery (org.apache.lucene.search.PrefixQuery)2 TermQuery (org.apache.lucene.search.TermQuery)2 WildcardQuery (org.apache.lucene.search.WildcardQuery)2 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 TermFactory.newFulltextTerm (org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm)1 TermFactory.newPathTerm (org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm)1 FullTextContains (org.apache.jackrabbit.oak.query.fulltext.FullTextContains)1 FullTextExpression (org.apache.jackrabbit.oak.query.fulltext.FullTextExpression)1 FullTextVisitor (org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor)1 IndexPlan (org.apache.jackrabbit.oak.spi.query.QueryIndex.IndexPlan)1 OrderEntry (org.apache.jackrabbit.oak.spi.query.QueryIndex.OrderEntry)1