use of org.apache.lucene.queryparser.classic.QueryParser in project jackrabbit-oak by apache.
the class LuceneIndex method getLuceneRequest.
/**
* Get the Lucene query for the given filter.
*
* @param filter the filter, including full-text constraint
* @param reader the Lucene reader
* @param nonFullTextConstraints whether non-full-text constraints (such a
* path, node type, and so on) should be added to the Lucene
* query
* @param indexDefinition nodestate that contains the index definition
* @return the Lucene query
*/
private static LuceneRequestFacade getLuceneRequest(Filter filter, IndexReader reader, boolean nonFullTextConstraints, IndexDefinition indexDefinition) {
List<Query> qs = new ArrayList<Query>();
Analyzer analyzer = indexDefinition.getAnalyzer();
FullTextExpression ft = filter.getFullTextConstraint();
if (ft == null) {
// there might be no full-text constraint
// when using the LowCostLuceneIndexProvider
// which is used for testing
} else {
qs.add(getFullTextQuery(ft, analyzer, reader));
}
PropertyRestriction pr = filter.getPropertyRestriction(NATIVE_QUERY_FUNCTION);
if (pr != null) {
String query = String.valueOf(pr.first.getValue(pr.first.getType()));
QueryParser queryParser = new QueryParser(VERSION, "", indexDefinition.getAnalyzer());
if (query.startsWith("mlt?")) {
String mltQueryString = query.replace("mlt?", "");
if (reader != null) {
Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
if (moreLikeThis != null) {
qs.add(moreLikeThis);
}
}
}
if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?", "");
if (reader != null) {
return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
}
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
} else if (nonFullTextConstraints) {
addNonFullTextConstraints(qs, filter, reader, analyzer, indexDefinition);
}
if (qs.size() == 0) {
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
return LucenePropertyIndex.performAdditionalWraps(qs);
}
use of org.apache.lucene.queryparser.classic.QueryParser in project jackrabbit-oak by apache.
the class LucenePropertyIndex method query.
@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
final Filter filter = plan.getFilter();
final Sort sort = getSort(plan);
final PlanResult pr = getPlanResult(plan);
QueryLimits settings = filter.getQueryLimits();
Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
private final Set<String> seenPaths = Sets.newHashSet();
private ScoreDoc lastDoc;
private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private IndexSearcher indexSearcher;
private int indexNodeId = -1;
@Override
protected LuceneResultRow computeNext() {
while (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
releaseSearcher();
return endOfData();
}
private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, Map<String, String> excerpts, Facets facets, String explanation) throws IOException {
IndexReader reader = searcher.getIndexReader();
// TODO Look into usage of field cache for retrieving the path
// instead of reading via reader if no of docs in index are limited
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
String path = visitor.getPath();
if (path != null) {
if ("".equals(path)) {
path = "/";
}
if (pr.isPathTransformed()) {
String originalPath = path;
path = pr.transformPath(path);
if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
return null;
}
// avoid duplicate entries
if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
return null;
}
seenPaths.add(path);
}
boolean shouldIncludeForHierarchy = shouldInclude(path, plan);
LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy);
return shouldIncludeForHierarchy ? new LuceneResultRow(path, doc.score, excerpts, facets, explanation) : null;
}
return null;
}
/**
* Loads the lucene documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
ScoreDoc lastDocToRecord = null;
final IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = getCurrentSearcher(indexNode);
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);
if (customScoreQuery != null) {
query = customScoreQuery;
}
TopDocs docs;
long start = PERF_LOGGER.start();
while (true) {
if (lastDoc != null) {
LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
} else {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
}
} else {
LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.search(query, nextBatchSize);
} else {
docs = searcher.search(query, nextBatchSize, sort);
}
}
PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
long f = PERF_LOGGER.start();
Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets());
PERF_LOGGER.end(f, -1, "facets retrieved");
Set<String> excerptFields = Sets.newHashSet();
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
if (QueryConstants.REP_EXCERPT.equals(pr.propertyName)) {
String value = pr.first.getValue(Type.STRING);
excerptFields.add(value);
}
}
boolean addExcerpt = excerptFields.size() > 0;
PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.OAK_SCORE_EXPLANATION);
boolean addExplain = restriction != null && restriction.isNotNullRestriction();
Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
FieldInfos mergedFieldInfos = null;
if (addExcerpt) {
// setup highlighter
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(true);
highlighter.setFragmentScorer(scorer);
mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
}
for (ScoreDoc doc : docs.scoreDocs) {
Map<String, String> excerpts = null;
if (addExcerpt) {
excerpts = getExcerpt(query, excerptFields, analyzer, searcher, doc, mergedFieldInfos);
}
String explanation = null;
if (addExplain) {
explanation = searcher.explain(query, doc.doc).toString();
}
LuceneResultRow row = convertToRow(doc, searcher, excerpts, facets, explanation);
if (row != null) {
queue.add(row);
}
lastDocToRecord = doc;
}
if (queue.isEmpty() && docs.scoreDocs.length > 0) {
// queue is still empty but more results can be fetched
// from Lucene so still continue
lastDoc = lastDocToRecord;
} else {
break;
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
noDocs = true;
SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
// ACL filter spellchecks
QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
for (SuggestWord suggestion : suggestWords) {
Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.string));
break;
}
}
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer());
// ACL filter suggestions
for (Lookup.LookupResult suggestion : lookupResults) {
Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
break;
}
}
}
}
}
} catch (Exception e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
if (lastDocToRecord != null) {
this.lastDoc = lastDocToRecord;
}
return !queue.isEmpty();
}
private IndexSearcher getCurrentSearcher(IndexNode indexNode) {
// the searcher would be refreshed as done earlier
if (indexNodeId != indexNode.getIndexNodeId()) {
// if already initialized then log about change
if (indexNodeId > 0) {
LOG.debug("Change in index version detected. Query would be performed without offset");
}
indexSearcher = indexNode.getSearcher();
indexNodeId = indexNode.getIndexNodeId();
lastDoc = null;
}
return indexSearcher;
}
private void releaseSearcher() {
// For now nullifying it.
indexSearcher = null;
}
};
SizeEstimator sizeEstimator = new SizeEstimator() {
@Override
public long getSize() {
IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(query, collector);
int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query, totalHits);
return totalHits;
}
LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
} catch (IOException e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
return -1;
}
};
if (pr.hasPropertyIndexResult() || pr.evaluateSyncNodeTypeRestriction()) {
itr = mergePropertyIndexResult(plan, rootState, itr);
}
return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
use of org.apache.lucene.queryparser.classic.QueryParser in project carbondata by apache.
the class LuceneCoarseGrainDataMap method prune.
/**
* Prune the datamap with filter expression. It returns the list of
* blocklets where these filters can exist.
*/
@Override
public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
// convert filter expr into lucene list query
List<String> fields = new ArrayList<String>();
// only for test , query all data
String strQuery = "*:*";
String[] sFields = new String[fields.size()];
fields.toArray(sFields);
// get analyzer
if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
// use MultiFieldQueryParser to parser query
QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
Query query;
try {
query = queryParser.parse(strQuery);
} catch (ParseException e) {
String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
LOGGER.error(errorMessage);
return null;
}
// execute index search
TopDocs result;
try {
result = indexSearcher.search(query, MAX_RESULT_NUMBER);
} catch (IOException e) {
String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
// temporary data, delete duplicated data
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
Map<String, Set<Number>> mapBlocks = new HashMap<String, Set<Number>>();
for (ScoreDoc scoreDoc : result.scoreDocs) {
// get a document
Document doc = indexSearcher.doc(scoreDoc.doc);
// get all fields
List<IndexableField> fieldsInDoc = doc.getFields();
// get this block id Map<BlockId, Set<BlockletId>>>>
String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
Set<Number> setBlocklets = mapBlocks.get(blockId);
if (setBlocklets == null) {
setBlocklets = new HashSet<Number>();
mapBlocks.put(blockId, setBlocklets);
}
// get the blocklet id Set<BlockletId>
Number blockletId = fieldsInDoc.get(BLOCKLETID_ID).numericValue();
if (!setBlocklets.contains(blockletId.intValue())) {
setBlocklets.add(blockletId.intValue());
}
}
// result blocklets
List<Blocklet> blocklets = new ArrayList<Blocklet>();
// transform all blocks into result type blocklets Map<BlockId, Set<BlockletId>>
for (Map.Entry<String, Set<Number>> mapBlock : mapBlocks.entrySet()) {
String blockId = mapBlock.getKey();
Set<Number> setBlocklets = mapBlock.getValue();
// for blocklets in this block Set<BlockletId>
for (Number blockletId : setBlocklets) {
// add a CoarseGrainBlocklet
blocklets.add(new Blocklet(blockId, blockletId.toString()));
}
}
return blocklets;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project jeesuite-libs by vakinge.
the class IndexingExample method main.
public static void main(String[] args) throws IOException, ParseException {
// indexing
IndexingExample app = new IndexingExample();
IndexWriter writer = app.createWriter();
writer.deleteAll();
List<Document> docs = app.createDocs();
writer.addDocuments(docs);
writer.deleteDocuments(new Term("isbn", "978-0321321367"));
writer.commit();
writer.close();
// search
IndexSearcher searcher = app.createSearcher();
QueryParser qp = new QueryParser("title", new StandardAnalyzer());
Query q1 = qp.parse("java");
TopDocs hits = searcher.search(q1, 10);
System.out.println(hits.totalHits + " docs found for the query \"" + q1.toString() + "\"");
int num = 0;
for (ScoreDoc sd : hits.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(String.format("#%d: %s (rating=%s)", ++num, d.get("title"), d.get("rating_display")));
}
System.out.println("");
Query q2 = qp.parse("java AND program*");
hits = searcher.search(q2, 10);
System.out.println(hits.totalHits + " docs found for the query \"" + q2.toString() + "\"");
num = 0;
for (ScoreDoc sd : hits.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(String.format("#%d: %s (rating=%s)", ++num, d.get("title"), d.get("rating_display")));
}
System.out.println("");
Query q3 = qp.parse("java OR scala");
hits = searcher.search(q3, 10);
System.out.println(hits.totalHits + " docs found for the query \"" + q3.toString() + "\"");
num = 0;
for (ScoreDoc sd : hits.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(String.format("#%d: %s (rating=%s)", ++num, d.get("title"), d.get("rating_display")));
}
System.out.println("");
// PointValues で範囲検索する
TermQuery tq = new TermQuery(new Term("title", "java"));
Query prq = IntPoint.newRangeQuery("rating", 3, Integer.MAX_VALUE);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query q4 = builder.add(tq, BooleanClause.Occur.MUST).add(prq, // Occur.FILTER を指定した節はスコア計算に影響しない
BooleanClause.Occur.FILTER).build();
hits = searcher.search(q4, 10);
System.out.println(hits.totalHits + " docs found for the query \"" + q4.toString() + "\"");
num = 0;
for (ScoreDoc sd : hits.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(String.format("#%d: %s (rating=%s)", ++num, d.get("title"), d.get("rating_display")));
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project jena by apache.
the class textindexdump method dump.
private static void dump(TextIndexLucene textIndex) {
try {
Directory directory = textIndex.getDirectory();
Analyzer analyzer = textIndex.getQueryAnalyzer();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser(textIndex.getDocDef().getPrimaryField(), analyzer);
Query query = queryParser.parse("*:*");
ScoreDoc[] sDocs = indexSearcher.search(query, 1000).scoreDocs;
for (ScoreDoc sd : sDocs) {
System.out.println("Doc: " + sd.doc);
Document doc = indexSearcher.doc(sd.doc);
// Don't forget that many fields aren't stored, just indexed.
for (IndexableField f : doc) {
// System.out.println(" "+f) ;
System.out.println(" " + f.name() + " = " + f.stringValue());
}
}
} catch (Exception ex) {
throw new TextIndexException(ex);
}
}
Aggregations