use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.
the class IndexPlanner method getPlanBuilder.
private IndexPlan.Builder getPlanBuilder() {
log.trace("Evaluating plan with index definition {}", definition);
FullTextExpression ft = filter.getFullTextConstraint();
if (!definition.getVersion().isAtLeast(IndexFormatVersion.V2)) {
log.trace("Index is old format. Not supported");
return null;
}
//Query Fulltext and Index does not support fulltext
if (ft != null && !definition.isFullTextEnabled()) {
return null;
}
IndexingRule indexingRule = getApplicableRule();
if (indexingRule == null) {
return null;
}
//Query Fulltext and indexing rule does not support fulltext
if (ft != null && !indexingRule.isFulltextEnabled()) {
return null;
}
if (!checkForQueryPaths()) {
log.trace("Opting out due mismatch between path restriction {} and query paths {}", filter.getPath(), definition.getQueryPaths());
return null;
}
result = new PlanResult(indexPath, definition, indexingRule);
if (definition.hasFunctionDefined() && filter.getPropertyRestriction(definition.getFunctionName()) != null) {
return getNativeFunctionPlanBuilder(indexingRule.getBaseNodeType());
}
List<String> indexedProps = newArrayListWithCapacity(filter.getPropertyRestrictions().size());
for (PropertyDefinition functionIndex : indexingRule.getFunctionRestrictions()) {
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
String f = functionIndex.function;
if (pr.propertyName.equals(f)) {
indexedProps.add(f);
result.propDefns.put(f, functionIndex);
}
}
}
//Optimization - Go further only if any of the property is configured
//for property index
List<String> facetFields = new LinkedList<String>();
if (indexingRule.propertyIndexEnabled) {
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
String name = pr.propertyName;
if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
continue;
}
if (name.startsWith(QueryConstants.FUNCTION_RESTRICTION_PREFIX)) {
// function-based indexes were handled before
continue;
}
if (QueryImpl.REP_FACET.equals(pr.propertyName)) {
String value = pr.first.getValue(Type.STRING);
facetFields.add(FacetHelper.parseFacetField(value));
}
PropertyDefinition pd = indexingRule.getConfig(pr.propertyName);
if (pd != null && pd.propertyIndexEnabled()) {
if (pr.isNullRestriction() && !pd.nullCheckEnabled) {
continue;
}
indexedProps.add(name);
result.propDefns.put(name, pd);
}
}
}
boolean evalNodeTypeRestrictions = canEvalNodeTypeRestrictions(indexingRule);
boolean evalPathRestrictions = canEvalPathRestrictions(indexingRule);
boolean canEvalAlFullText = canEvalAllFullText(indexingRule, ft);
boolean canEvalNodeNameRestriction = canEvalNodeNameRestriction(indexingRule);
if (ft != null && !canEvalAlFullText) {
return null;
}
//Fulltext expression can also be like jcr:contains(jcr:content/metadata/@format, 'image')
List<OrderEntry> sortOrder = createSortOrder(indexingRule);
boolean canSort = canSortByProperty(sortOrder);
if (!indexedProps.isEmpty() || canSort || ft != null || evalPathRestrictions || evalNodeTypeRestrictions || canEvalNodeNameRestriction) {
//TODO Need a way to have better cost estimate to indicate that
//this index can evaluate more propertyRestrictions natively (if more props are indexed)
//For now we reduce cost per entry
int costPerEntryFactor = indexedProps.size();
costPerEntryFactor += sortOrder.size();
//this index can evaluate more propertyRestrictions natively (if more props are indexed)
//For now we reduce cost per entry
IndexPlan.Builder plan = defaultPlan();
if (!sortOrder.isEmpty()) {
plan.setSortOrder(sortOrder);
}
if (costPerEntryFactor == 0) {
costPerEntryFactor = 1;
}
if (facetFields.size() > 0) {
plan.setAttribute(FacetHelper.ATTR_FACET_FIELDS, facetFields);
}
if (ft == null) {
result.enableNonFullTextConstraints();
}
if (evalNodeTypeRestrictions) {
result.enableNodeTypeEvaluation();
}
if (canEvalNodeNameRestriction) {
result.enableNodeNameRestriction();
}
return plan.setCostPerEntry(definition.getCostPerEntry() / costPerEntryFactor);
}
return null;
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.
the class IndexPlanner method canEvalAllFullText.
private boolean canEvalAllFullText(final IndexingRule indexingRule, FullTextExpression ft) {
if (ft == null) {
return false;
}
final HashSet<String> relPaths = new HashSet<String>();
final HashSet<String> nonIndexedPaths = new HashSet<String>();
final AtomicBoolean relativeParentsFound = new AtomicBoolean();
final AtomicBoolean nodeScopedCondition = new AtomicBoolean();
ft.accept(new FullTextVisitor.FullTextVisitorBase() {
@Override
public boolean visit(FullTextContains contains) {
visitTerm(contains.getPropertyName());
return true;
}
@Override
public boolean visit(FullTextTerm term) {
visitTerm(term.getPropertyName());
return true;
}
private void visitTerm(String propertyName) {
String p = propertyName;
String propertyPath = null;
String nodePath = null;
if (p == null) {
relPaths.add("");
} else if (p.startsWith("../") || p.startsWith("./")) {
relPaths.add(p);
relativeParentsFound.set(true);
} else if (getDepth(p) > 1) {
String parent = getParentPath(p);
if (LucenePropertyIndex.isNodePath(p)) {
nodePath = parent;
} else {
propertyPath = p;
}
relPaths.add(parent);
} else {
propertyPath = p;
relPaths.add("");
}
if (nodePath != null && !indexingRule.isAggregated(nodePath)) {
nonIndexedPaths.add(p);
} else if (propertyPath != null) {
PropertyDefinition pd = indexingRule.getConfig(propertyPath);
//not indexed
if (pd == null) {
nonIndexedPaths.add(p);
} else if (!pd.analyzed) {
nonIndexedPaths.add(p);
}
}
if (nodeScopedTerm(propertyName)) {
nodeScopedCondition.set(true);
}
}
});
if (nodeScopedCondition.get() && !indexingRule.isNodeFullTextIndexed()) {
return false;
}
if (relativeParentsFound.get()) {
log.debug("Relative parents found {} which are not supported", relPaths);
return false;
}
//have jcr:content as parent. So ensure that relPaths size is 1 or 0
if (!nonIndexedPaths.isEmpty()) {
if (relPaths.size() > 1) {
log.debug("Following relative property paths are not index", relPaths);
return false;
}
result.setParentPath(Iterables.getOnlyElement(relPaths, ""));
//Such non indexed path can possibly be evaluated via any rule on nt:base
//which can possibly index everything
IndexingRule rule = definition.getApplicableIndexingRule(NT_BASE);
if (rule == null) {
return false;
}
for (String p : nonIndexedPaths) {
//if it indexes node scope indexing is enabled
if (LucenePropertyIndex.isNodePath(p)) {
if (!rule.isNodeFullTextIndexed()) {
return false;
}
} else {
//Index can only evaluate a property like jcr:content/type
//if it indexes 'type' and that too analyzed
String propertyName = PathUtils.getName(p);
PropertyDefinition pd = rule.getConfig(propertyName);
if (pd == null) {
return false;
}
if (!pd.analyzed) {
return false;
}
}
}
} else {
result.setParentPath("");
}
return true;
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.
the class LuceneIndex method addNonFullTextConstraints.
private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader, Analyzer analyzer, IndexDefinition indexDefinition) {
if (!filter.matchesAllTypes()) {
addNodeTypeConstraints(qs, filter);
}
String path = filter.getPath();
switch(filter.getPathRestriction()) {
case ALL_CHILDREN:
if (USE_PATH_RESTRICTION) {
if ("/".equals(path)) {
break;
}
if (!path.endsWith("/")) {
path += "/";
}
qs.add(new PrefixQuery(newPathTerm(path)));
}
break;
case DIRECT_CHILDREN:
if (USE_PATH_RESTRICTION) {
if (!path.endsWith("/")) {
path += "/";
}
qs.add(new PrefixQuery(newPathTerm(path)));
}
break;
case EXACT:
qs.add(new TermQuery(newPathTerm(path)));
break;
case PARENT:
if (denotesRoot(path)) {
// there's no parent of the root node
// we add a path that can not possibly occur because there
// is no way to say "match no documents" in Lucene
qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
} else {
qs.add(new TermQuery(newPathTerm(getParentPath(path))));
}
break;
case NO_RESTRICTION:
break;
}
//Fulltext index definition used by LuceneIndex only works with old format
//which is not nodeType based. So just use the nt:base index
IndexingRule rule = indexDefinition.getApplicableIndexingRule(JcrConstants.NT_BASE);
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
if (pr.first == null && pr.last == null) {
// queries (OAK-1208)
continue;
}
// check excluded properties and types
if (isExcludedProperty(pr, rule)) {
continue;
}
String name = pr.propertyName;
if (QueryImpl.REP_EXCERPT.equals(name) || QueryImpl.OAK_SCORE_EXPLANATION.equals(name) || QueryImpl.REP_FACET.equals(name)) {
continue;
}
if (JCR_PRIMARYTYPE.equals(name)) {
continue;
}
if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
continue;
}
if (skipTokenization(name)) {
qs.add(new TermQuery(new Term(name, pr.first.getValue(STRING))));
continue;
}
String first = null;
String last = null;
boolean isLike = pr.isLike;
// TODO what to do with escaped tokens?
if (pr.first != null) {
first = pr.first.getValue(STRING);
first = first.replace("\\", "");
}
if (pr.last != null) {
last = pr.last.getValue(STRING);
last = last.replace("\\", "");
}
if (isLike) {
first = first.replace('%', WildcardQuery.WILDCARD_STRING);
first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
int len = first.length();
if (indexOfWS == len || indexOfWC == len) {
// remove trailing "*" for prefixquery
first = first.substring(0, first.length() - 1);
if (JCR_PATH.equals(name)) {
qs.add(new PrefixQuery(newPathTerm(first)));
} else {
qs.add(new PrefixQuery(new Term(name, first)));
}
} else {
if (JCR_PATH.equals(name)) {
qs.add(new WildcardQuery(newPathTerm(first)));
} else {
qs.add(new WildcardQuery(new Term(name, first)));
}
}
continue;
}
if (first != null && first.equals(last) && pr.firstIncluding && pr.lastIncluding) {
if (JCR_PATH.equals(name)) {
qs.add(new TermQuery(newPathTerm(first)));
} else {
if ("*".equals(name)) {
addReferenceConstraint(first, qs, reader);
} else {
for (String t : tokenize(first, analyzer)) {
qs.add(new TermQuery(new Term(name, t)));
}
}
}
continue;
}
first = tokenizeAndPoll(first, analyzer);
last = tokenizeAndPoll(last, analyzer);
qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding));
}
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.
the class LucenePropertyIndex method tokenToQuery.
private static Query tokenToQuery(String text, String fieldName, PlanResult pr, Analyzer analyzer, FulltextQueryTermsProvider augmentor) {
Query ret;
IndexingRule indexingRule = pr.indexingRule;
//Expand the query on fulltext field
if (FieldNames.FULLTEXT.equals(fieldName) && !indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
BooleanQuery in = new BooleanQuery();
for (PropertyDefinition pd : indexingRule.getNodeScopeAnalyzedProps()) {
Query q = tokenToQuery(text, FieldNames.createAnalyzedFieldName(pd.name), analyzer);
q.setBoost(pd.boost);
in.add(q, BooleanClause.Occur.SHOULD);
}
//Add the query for actual fulltext field also. That query would
//not be boosted
in.add(tokenToQuery(text, fieldName, analyzer), BooleanClause.Occur.SHOULD);
ret = in;
} else {
ret = tokenToQuery(text, fieldName, analyzer);
}
//Augment query terms if available (as a 'SHOULD' clause)
if (augmentor != null && FieldNames.FULLTEXT.equals(fieldName)) {
Query subQuery = augmentor.getQueryTerm(text, analyzer, pr.indexDefinition.getDefinitionNodeState());
if (subQuery != null) {
BooleanQuery query = new BooleanQuery();
query.add(ret, BooleanClause.Occur.SHOULD);
query.add(subQuery, BooleanClause.Occur.SHOULD);
ret = query;
}
}
return ret;
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule in project jackrabbit-oak by apache.
the class IndexDefinitionTest method propertyRegExAndRelativeProperty.
@Test
public void propertyRegExAndRelativeProperty() throws Exception {
NodeBuilder defnb = newLuceneIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "lucene", of(TYPENAME_STRING), of("foo"), "async");
IndexDefinition defn = new IndexDefinition(root, defnb.getNodeState(), "/foo");
assertTrue(defn.isOfOldFormat());
NodeBuilder updated = IndexDefinition.updateDefinition(defnb.getNodeState().builder());
IndexDefinition defn2 = new IndexDefinition(root, updated.getNodeState(), "/foo");
IndexingRule rule = defn2.getApplicableIndexingRule(asState(newNode("nt:base")));
assertNotNull(rule.getConfig("foo"));
assertNull("Property regex used should not allow relative properties", rule.getConfig("foo/bar"));
}
Aggregations