use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getLuceneRequest.
/**
* Get the Lucene query for the given filter.
*
* @param plan index plan containing filter details
* @param reader the Lucene reader
* @return the Lucene query
*/
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
List<Query> qs = new ArrayList<Query>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
PlanResult planResult = getPlanResult(plan);
IndexDefinition defn = planResult.indexDefinition;
Analyzer analyzer = defn.getAnalyzer();
if (ft == null) {
// there might be no full-text constraint
// when using the LowCostLuceneIndexProvider
// which is used for testing
} else {
qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
}
//Check if native function is supported
PropertyRestriction pr = null;
if (defn.hasFunctionDefined()) {
pr = filter.getPropertyRestriction(defn.getFunctionName());
}
if (pr != null) {
String query = String.valueOf(pr.first.getValue(pr.first.getType()));
QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
if (query.startsWith("mlt?")) {
String mltQueryString = query.replace("mlt?", "");
if (reader != null) {
Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
if (moreLikeThis != null) {
qs.add(moreLikeThis);
}
}
} else if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?", "");
if (reader != null) {
return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
}
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
} else if (planResult.evaluateNonFullTextConstraints()) {
addNonFullTextConstraints(qs, plan, reader);
}
if (qs.size() == 0 && plan.getSortOrder() != null) {
//This case indicates that query just had order by and no
//property restriction defined. In this case property
//existence queries for each sort entry
List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
for (int i = 0; i < orders.size(); i++) {
OrderEntry oe = orders.get(i);
PropertyDefinition pd = planResult.getOrderedProperty(i);
PropertyRestriction orderRest = new PropertyRestriction();
orderRest.propertyName = oe.getPropertyName();
Query q = createQuery(orderRest, pd);
if (q != null) {
qs.add(q);
}
}
}
if (qs.size() == 0) {
if (reader == null) {
//just return match all queries
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
//be returned (if the index definition has a single rule)
if (planResult.evaluateNodeTypeRestriction()) {
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
throw new IllegalStateException("No query created for filter " + filter);
}
return performAdditionalWraps(qs);
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getCustomScoreQuery.
private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query subQuery) {
PlanResult planResult = getPlanResult(plan);
IndexDefinition idxDef = planResult.indexDefinition;
String providerName = idxDef.getScorerProviderName();
if (scorerProviderFactory != null && providerName != null) {
return scorerProviderFactory.getScorerProvider(providerName).createCustomScoreQuery(subQuery);
}
return null;
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getFullTextQuery.
static Query getFullTextQuery(final IndexPlan plan, FullTextExpression ft, final Analyzer analyzer, final FulltextQueryTermsProvider augmentor) {
final PlanResult pr = getPlanResult(plan);
// a reference to the query, so it can be set in the visitor
// (a "non-local return")
final AtomicReference<Query> result = new AtomicReference<Query>();
ft.accept(new FullTextVisitor() {
@Override
public boolean visit(FullTextContains contains) {
visitTerm(contains.getPropertyName(), contains.getRawText(), null, contains.isNot());
return true;
}
@Override
public boolean visit(FullTextOr or) {
BooleanQuery q = new BooleanQuery();
for (FullTextExpression e : or.list) {
Query x = getFullTextQuery(plan, e, analyzer, augmentor);
q.add(x, SHOULD);
}
result.set(q);
return true;
}
@Override
public boolean visit(FullTextAnd and) {
BooleanQuery q = new BooleanQuery();
for (FullTextExpression e : and.list) {
Query x = getFullTextQuery(plan, e, analyzer, augmentor);
/* Only unwrap the clause if MUST_NOT(x) */
boolean hasMustNot = false;
if (x instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) x;
if ((bq.getClauses().length == 1) && (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) {
hasMustNot = true;
q.add(bq.getClauses()[0]);
}
}
if (!hasMustNot) {
q.add(x, MUST);
}
}
result.set(q);
return true;
}
@Override
public boolean visit(FullTextTerm term) {
return visitTerm(term.getPropertyName(), term.getText(), term.getBoost(), term.isNot());
}
private boolean visitTerm(String propertyName, String text, String boost, boolean not) {
String p = getLuceneFieldName(propertyName, pr);
Query q = tokenToQuery(text, p, pr, analyzer, augmentor);
if (q == null) {
return false;
}
if (boost != null) {
q.setBoost(Float.parseFloat(boost));
}
if (not) {
BooleanQuery bq = new BooleanQuery();
bq.add(q, MUST_NOT);
result.set(bq);
} else {
result.set(q);
}
return true;
}
});
return result.get();
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult in project jackrabbit-oak by apache.
the class LucenePropertyIndex method addNonFullTextConstraints.
private static void addNonFullTextConstraints(List<Query> qs, IndexPlan plan, IndexReader reader) {
Filter filter = plan.getFilter();
PlanResult planResult = getPlanResult(plan);
IndexDefinition defn = planResult.indexDefinition;
if (!filter.matchesAllTypes()) {
addNodeTypeConstraints(planResult.indexingRule, qs, filter);
}
String path = getPathRestriction(plan);
switch(filter.getPathRestriction()) {
case ALL_CHILDREN:
if (defn.evaluatePathRestrictions()) {
if ("/".equals(path)) {
break;
}
qs.add(new TermQuery(newAncestorTerm(path)));
}
break;
case DIRECT_CHILDREN:
if (defn.evaluatePathRestrictions()) {
BooleanQuery bq = new BooleanQuery();
bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST));
qs.add(bq);
}
break;
case EXACT:
qs.add(new TermQuery(newPathTerm(path)));
break;
case PARENT:
if (denotesRoot(path)) {
// there's no parent of the root node
// we add a path that can not possibly occur because there
// is no way to say "match no documents" in Lucene
qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
} else {
qs.add(new TermQuery(newPathTerm(getParentPath(path))));
}
break;
case NO_RESTRICTION:
break;
}
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
String name = pr.propertyName;
if (QueryImpl.REP_EXCERPT.equals(name) || QueryImpl.OAK_SCORE_EXPLANATION.equals(name) || QueryImpl.REP_FACET.equals(name)) {
continue;
}
if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
if (planResult.evaluateNodeNameRestriction()) {
Query q = createNodeNameQuery(pr);
if (q != null) {
qs.add(q);
}
}
continue;
}
if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) {
String first = pr.first.getValue(STRING);
first = first.replace("\\", "");
if (JCR_PATH.equals(name)) {
qs.add(new TermQuery(newPathTerm(first)));
continue;
} else if ("*".equals(name)) {
//TODO Revisit reference constraint. For performant impl
//references need to be indexed in a different manner
addReferenceConstraint(first, qs, reader);
continue;
}
}
PropertyDefinition pd = planResult.getPropDefn(pr);
if (pd == null) {
continue;
}
Query q = createQuery(pr, pd);
if (q != null) {
qs.add(q);
}
}
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult in project jackrabbit-oak by apache.
the class LucenePropertyIndex method query.
@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
final Filter filter = plan.getFilter();
final Sort sort = getSort(plan);
final PlanResult pr = getPlanResult(plan);
QueryEngineSettings settings = filter.getQueryEngineSettings();
Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
private final Set<String> seenPaths = Sets.newHashSet();
private ScoreDoc lastDoc;
private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private IndexSearcher indexSearcher;
private int indexNodeId = -1;
@Override
protected LuceneResultRow computeNext() {
while (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
releaseSearcher();
return endOfData();
}
private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException {
IndexReader reader = searcher.getIndexReader();
//TODO Look into usage of field cache for retrieving the path
//instead of reading via reader if no of docs in index are limited
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
String path = visitor.getPath();
if (path != null) {
if ("".equals(path)) {
path = "/";
}
if (pr.isPathTransformed()) {
String originalPath = path;
path = pr.transformPath(path);
if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
return null;
}
// avoid duplicate entries
if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
return null;
}
seenPaths.add(path);
}
LOG.trace("Matched path {}", path);
return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
}
return null;
}
/**
* Loads the lucene documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
ScoreDoc lastDocToRecord = null;
final IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = getCurrentSearcher(indexNode);
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);
if (customScoreQuery != null) {
query = customScoreQuery;
}
TopDocs docs;
long start = PERF_LOGGER.start();
while (true) {
if (lastDoc != null) {
LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
} else {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
}
} else {
LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.search(query, nextBatchSize);
} else {
docs = searcher.search(query, nextBatchSize, sort);
}
}
PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
long f = PERF_LOGGER.start();
Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets());
PERF_LOGGER.end(f, -1, "facets retrieved");
PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
boolean addExplain = restriction != null && restriction.isNotNullRestriction();
Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
FieldInfos mergedFieldInfos = null;
if (addExcerpt) {
// setup highlighter
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(true);
highlighter.setFragmentScorer(scorer);
mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
}
for (ScoreDoc doc : docs.scoreDocs) {
String excerpt = null;
if (addExcerpt) {
excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
}
String explanation = null;
if (addExplain) {
explanation = searcher.explain(query, doc.doc).toString();
}
LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
if (row != null) {
queue.add(row);
}
lastDocToRecord = doc;
}
if (queue.isEmpty() && docs.scoreDocs.length > 0) {
//queue is still empty but more results can be fetched
//from Lucene so still continue
lastDoc = lastDocToRecord;
} else {
break;
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
noDocs = true;
SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
// ACL filter spellchecks
QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
for (SuggestWord suggestion : suggestWords) {
Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.string));
break;
}
}
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer());
// ACL filter suggestions
for (Lookup.LookupResult suggestion : lookupResults) {
Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
break;
}
}
}
}
}
} catch (Exception e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
if (lastDocToRecord != null) {
this.lastDoc = lastDocToRecord;
}
return !queue.isEmpty();
}
private IndexSearcher getCurrentSearcher(IndexNode indexNode) {
//the searcher would be refreshed as done earlier
if (indexNodeId != indexNode.getIndexNodeId()) {
//if already initialized then log about change
if (indexNodeId > 0) {
LOG.debug("Change in index version detected. Query would be performed without offset");
}
//TODO Add testcase for this scenario
indexSearcher = indexNode.getSearcher();
indexNodeId = indexNode.getIndexNodeId();
lastDoc = null;
}
return indexSearcher;
}
private void releaseSearcher() {
//For now nullifying it.
indexSearcher = null;
}
};
SizeEstimator sizeEstimator = new SizeEstimator() {
@Override
public long getSize() {
IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(query, collector);
int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query, totalHits);
return totalHits;
}
LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
} catch (IOException e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
return -1;
}
};
return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
Aggregations