use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInSpanFirst.
public void testNoSuchMultiTermsInSpanFirst() throws Exception {
//this hasn't been a problem
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery spanFirst = new SpanFirstQuery(spanNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
spanFirst = new SpanFirstQuery(spanWCNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
spanFirst = new SpanFirstQuery(spanRgxNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
spanFirst = new SpanFirstQuery(spanPrfxNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestSpanMultiTermQueryWrapper method testFuzzy.
public void testFuzzy() throws Exception {
FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"));
SpanQuery sfq = new SpanMultiTermQueryWrapper<>(fq);
// will not match quick brown fox
SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 3, 6);
assertEquals(2, searcher.search(sprq, 10).totalHits);
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testWhichMTQMatched.
/**
* Runs a query with two MTQs and confirms the formatter
* can tell which query matched which hit.
*/
public void testWhichMTQMatched() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
// use a variety of common MTQ types
BooleanQuery query = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD).add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
// Default formatter just bolds each hit:
assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
// Now use our own formatter, that also stuffs the
// matching term's text into the result:
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new PassageFormatter() {
@Override
public Object format(Passage[] passages, String content) {
// Copied from DefaultPassageFormatter, but
// tweaked to include the matched term:
StringBuilder sb = new StringBuilder();
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.getStartOffset() > pos && pos > 0) {
sb.append("... ");
}
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
sb.append(content, pos, start);
}
if (end > pos) {
sb.append("<b>");
sb.append(content, Math.max(pos, start), end);
sb.append('(');
sb.append(passage.getMatchTerms()[i].utf8ToString());
sb.append(')');
sb.append("</b>");
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
pos = passage.getEndOffset();
}
return sb.toString();
}
};
}
};
assertEquals(1, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testOneFuzzy.
public void testOneFuzzy() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new FuzzyQuery(new Term("body", "tets"), 1);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// with prefix
query = new FuzzyQuery(new Term("body", "tets"), 1, 2);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("bogus", "tets"), 1), BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestQPHelper method testWildcard.
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "(term*)^2.0");
assertQueryEquals("term~", null, "term~2");
assertQueryEquals("term~0.7", null, "term~1");
assertQueryEquals("term~^3", null, "(term~2)^3.0");
assertQueryEquals("term^3~", null, "(term~2)^3.0");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
assertEquals(1, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
fq = (FuzzyQuery) getQuery("term~", null);
assertEquals(2, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
// value > 1, throws exception
assertQueryNodeException("term~1.1");
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/*
* Tests to see that wild card terms are (or are not) properly lower-cased
* with propery parser configuration
*/
// First prefix queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Term*", "term*");
// explicitly set lowercase:
assertWildcardQueryEquals("term*", "term*");
assertWildcardQueryEquals("Term*", "term*");
assertWildcardQueryEquals("TERM*", "term*");
// Then 'full' wildcard queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Te?m", "te?m");
// explicitly set lowercase:
assertWildcardQueryEquals("te?m", "te?m");
assertWildcardQueryEquals("Te?m", "te?m");
assertWildcardQueryEquals("TE?M", "te?m");
assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~2");
// Range queries:
// TODO: implement this on QueryParser
// Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
// C]': Lexical error at line 1, column 1. Encountered: "[" (91), after
// : ""
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
// Test suffix queries: first disallow
expectThrows(QueryNodeException.class, () -> {
assertWildcardQueryEquals("*Term", "*term");
});
expectThrows(QueryNodeException.class, () -> {
assertWildcardQueryEquals("?Term", "?term");
});
// Test suffix queries: then allow
assertWildcardQueryEquals("*Term", "*term", true);
assertWildcardQueryEquals("?Term", "?term", true);
}
Aggregations