use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testWithMaxLenAndMultipleWildcardMatches.
public void testWithMaxLenAndMultipleWildcardMatches() throws IOException {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
//tests interleaving of multiple wildcard matches with the CompositePostingsEnum
//In this case the CompositePostingsEnum will have an underlying PostingsEnum that jumps form pos 1 to 9 for bravo
//and a second with position 2 for Bravado
body.setStringValue("Alpha Bravo Bravado foo foo foo. Foo foo Alpha Bravo");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
//a little past first sentence
highlighter.setMaxLength(32);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
//ask for 2 but we'll only get 1
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertArrayEquals(new String[] { "<b>Alpha</b> <b>Bravo</b> <b>Bravado</b> foo foo foo." }, snippets);
ir.close();
}
use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.
the class AnalyzingInfixSuggester method lookup.
/**
* This is an advanced method providing the capability to send down to the suggester any
* arbitrary lucene query to be used to filter the result of the suggester
*
* @param key the keyword being looked for
* @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
* @param num number of items to return
* @param allTermsRequired all searched terms must match or not
* @param doHighlight if true, the matching term will be highlighted in the search result
* @return the result of the suggester
* @throws IOException f the is IO exception while reading data from the index
*/
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
}
final BooleanClause.Occur occur;
if (allTermsRequired) {
occur = BooleanClause.Occur.MUST;
} else {
occur = BooleanClause.Occur.SHOULD;
}
BooleanQuery.Builder query;
Set<String> matchedTokens;
String prefixToken = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
//long t0 = System.currentTimeMillis();
ts.reset();
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
String lastToken = null;
query = new BooleanQuery.Builder();
int maxEndOffset = -1;
matchedTokens = new HashSet<>();
while (ts.incrementToken()) {
if (lastToken != null) {
matchedTokens.add(lastToken);
query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
}
lastToken = termAtt.toString();
if (lastToken != null) {
maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
}
}
ts.end();
if (lastToken != null) {
Query lastQuery;
if (maxEndOffset == offsetAtt.endOffset()) {
// Use PrefixQuery (or the ngram equivalent) when
// there was no trailing discarded chars in the
// string (e.g. whitespace), so that if query does
// not end with a space we show prefix matches for
// that token:
lastQuery = getLastTokenQuery(lastToken);
prefixToken = lastToken;
} else {
// Use TermQuery for an exact match if there were
// trailing discarded chars (e.g. whitespace), so
// that if query ends with a space we only show
// exact matches for that term:
matchedTokens.add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
if (lastQuery != null) {
query.add(lastQuery, occur);
}
}
if (contextQuery != null) {
boolean allMustNot = true;
for (BooleanClause clause : contextQuery.clauses()) {
if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
allMustNot = false;
break;
}
}
if (allMustNot) {
// All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
for (BooleanClause clause : contextQuery.clauses()) {
query.add(clause);
}
} else if (allTermsRequired == false) {
// We must carefully upgrade the query clauses to MUST:
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
newQuery.add(query.build(), BooleanClause.Occur.MUST);
newQuery.add(contextQuery, BooleanClause.Occur.MUST);
query = newQuery;
} else {
// Add contextQuery as sub-query
query.add(contextQuery, BooleanClause.Occur.MUST);
}
}
}
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
Query finalQuery = finishQuery(query, allTermsRequired);
//System.out.println("finalQuery=" + finalQuery);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null;
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
// acquire & release on same SearcherManager, via local reference
mgr = searcherMgr;
searcher = mgr.acquire();
}
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
TopFieldDocs hits = c.topDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally {
mgr.release(searcher);
}
return results;
}
use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.
the class TestQPHelper method testWildcard.
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "(term*)^2.0");
assertQueryEquals("term~", null, "term~2");
assertQueryEquals("term~0.7", null, "term~1");
assertQueryEquals("term~^3", null, "(term~2)^3.0");
assertQueryEquals("term^3~", null, "(term~2)^3.0");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
assertEquals(1, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
fq = (FuzzyQuery) getQuery("term~", null);
assertEquals(2, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
// value > 1, throws exception
assertQueryNodeException("term~1.1");
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/*
* Tests to see that wild card terms are (or are not) properly lower-cased
* with propery parser configuration
*/
// First prefix queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Term*", "term*");
// explicitly set lowercase:
assertWildcardQueryEquals("term*", "term*");
assertWildcardQueryEquals("Term*", "term*");
assertWildcardQueryEquals("TERM*", "term*");
// Then 'full' wildcard queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Te?m", "te?m");
// explicitly set lowercase:
assertWildcardQueryEquals("te?m", "te?m");
assertWildcardQueryEquals("Te?m", "te?m");
assertWildcardQueryEquals("TE?M", "te?m");
assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~2");
// Range queries:
// TODO: implement this on QueryParser
// Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
// C]': Lexical error at line 1, column 1. Encountered: "[" (91), after
// : ""
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
// Test suffix queries: first disallow
expectThrows(QueryNodeException.class, () -> {
assertWildcardQueryEquals("*Term", "*term");
});
expectThrows(QueryNodeException.class, () -> {
assertWildcardQueryEquals("?Term", "?term");
});
// Test suffix queries: then allow
assertWildcardQueryEquals("*Term", "*term", true);
assertWildcardQueryEquals("?Term", "?term", true);
}
use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.
the class TestQPHelper method testConstantScoreAutoRewrite.
public void testConstantScoreAutoRewrite() throws Exception {
StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
Query q = qp.parse("foo*bar", "field");
assertTrue(q instanceof WildcardQuery);
assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
q = qp.parse("foo*", "field");
assertTrue(q instanceof PrefixQuery);
assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
q = qp.parse("[a TO z]", "field");
assertTrue(q instanceof TermRangeQuery);
assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
}
use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.
the class TestSimpleQueryParser method testPrefix.
/** test a simple prefix */
public void testPrefix() throws Exception {
PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
assertEquals(expected, parse("foobar*"));
}
Aggregations