use of org.apache.lucene.search.FuzzyQuery in project elasticsearch by elastic.
the class KeywordFieldTypeTests method testFuzzyQuery.
public void testFuzzyQuery() {
MappedFieldType ft = createDefaultFieldType();
ft.setName("field");
ft.setIndexOptions(IndexOptions.DOCS);
assertEquals(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
ft.setIndexOptions(IndexOptions.NONE);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
use of org.apache.lucene.search.FuzzyQuery in project languagetool by languagetool-org.
the class SimilarWordFinder method findSimilarWordsTo.
private void findSimilarWordsTo(DirectoryReader reader, IndexSearcher searcher, String word) throws IOException {
// a missing char counts as a distance of 2
FuzzyQuery query = new FuzzyQuery(new Term("word", word), 2);
TopDocs topDocs = searcher.search(query, 10);
//System.out.println(topDocs.totalHits + " hits for " + word);
List<SimWord> simWords = findSimilarWordsFor(reader, word, topDocs);
//System.out.println(word + " -> " + String.join(", ", simWords));
for (SimWord simWord : simWords) {
if (word.length() == simWord.word.length()) {
int firstDiffPos = getDiffPos(simWord.word.toLowerCase(), word.toLowerCase());
try {
float dist = keyDistance.getDistance(word.charAt(firstDiffPos), simWord.word.charAt(firstDiffPos));
System.out.println(dist + "; " + word + "; " + simWord);
} catch (Exception e) {
System.err.println("Could not get distance between '" + word + "' and '" + simWord + "':");
e.printStackTrace();
}
} else {
// TODO: these need to be handled, too
//System.out.println("-; " + word + "; " + simWord.word);
}
}
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestPrecedenceQueryParser method testWildcard.
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "(term*)^2.0");
assertQueryEquals("term~", null, "term~2");
assertQueryEquals("term~0.7", null, "term~1");
assertQueryEquals("term~^3", null, "(term~2)^3.0");
assertQueryEquals("term^3~", null, "(term~2)^3.0");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
assertEquals(1, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
fq = (FuzzyQuery) getQuery("term~", null);
assertEquals(2, fq.getMaxEdits());
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
expectThrows(ParseException.class, () -> {
// value > 1, throws exception
getQuery("term~1.1", null);
});
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/*
* Tests to see that wild card terms are (or are not) properly lower-cased
* with propery parser configuration
*/
// First prefix queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Term*", "term*");
// explicitly set lowercase:
assertWildcardQueryEquals("term*", "term*");
assertWildcardQueryEquals("Term*", "term*");
assertWildcardQueryEquals("TERM*", "term*");
// Then 'full' wildcard queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Te?m", "te?m");
// explicitly set lowercase:
assertWildcardQueryEquals("te?m", "te?m");
assertWildcardQueryEquals("Te?m", "te?m");
assertWildcardQueryEquals("TE?M", "te?m");
assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~2");
// Range queries:
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class QueryParsing method toString.
/**
* @see #toString(Query,IndexSchema)
*/
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
// clear the boosted / is clause flags for recursion
int subflag = flags & ~(FLAG_BOOSTED | FLAG_IS_CLAUSE);
if (query instanceof TermQuery) {
TermQuery q = (TermQuery) query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.bytes(), ft, out, flags);
} else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{');
BytesRef lt = q.getLowerTerm();
BytesRef ut = q.getUpperTerm();
if (lt == null) {
out.append('*');
} else {
writeFieldVal(lt, ft, out, flags);
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
writeFieldVal(ut, ft, out, flags);
}
out.append(q.includesUpper() ? ']' : '}');
} else if (query instanceof LegacyNumericRangeQuery) {
LegacyNumericRangeQuery q = (LegacyNumericRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesMin() ? '[' : '{');
Number lt = q.getMin();
Number ut = q.getMax();
if (lt == null) {
out.append('*');
} else {
out.append(lt.toString());
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
out.append(ut.toString());
}
out.append(q.includesMax() ? ']' : '}');
} else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery) query;
boolean needParens = false;
if (q.getMinimumNumberShouldMatch() != 0 || (flags & (FLAG_IS_CLAUSE | FLAG_BOOSTED)) != 0) {
needParens = true;
}
if (needParens) {
out.append('(');
}
boolean first = true;
for (BooleanClause c : q.clauses()) {
if (!first) {
out.append(' ');
} else {
first = false;
}
if (c.isProhibited()) {
out.append('-');
} else if (c.isRequired()) {
out.append('+');
}
Query subQuery = c.getQuery();
toString(subQuery, schema, out, subflag | FLAG_IS_CLAUSE);
}
if (needParens) {
out.append(')');
}
if (q.getMinimumNumberShouldMatch() > 0) {
out.append('~');
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
}
} else if (query instanceof PrefixQuery) {
PrefixQuery q = (PrefixQuery) query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof WildcardQuery) {
out.append(query.toString());
} else if (query instanceof FuzzyQuery) {
out.append(query.toString());
} else if (query instanceof ConstantScoreQuery) {
out.append(query.toString());
} else if (query instanceof WrappedQuery) {
WrappedQuery q = (WrappedQuery) query;
out.append(q.getOptions());
toString(q.getWrappedQuery(), schema, out, subflag);
} else if (query instanceof BoostQuery) {
BoostQuery q = (BoostQuery) query;
toString(q.getQuery(), schema, out, subflag | FLAG_BOOSTED);
out.append("^");
out.append(Float.toString(q.getBoost()));
} else {
out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')');
}
}
use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.
the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInNotNear.
public void testNoSuchMultiTermsInNotNear() throws Exception {
//test to make sure non existent multiterms aren't throwing non-matching field exceptions
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
SpanNotQuery notNear = new SpanNotQuery(term, spanNoSuch, 0, 0);
assertEquals(1, searcher.search(notNear, 10).totalHits);
//flip
notNear = new SpanNotQuery(spanNoSuch, term, 0, 0);
assertEquals(0, searcher.search(notNear, 10).totalHits);
//both noSuch
notNear = new SpanNotQuery(spanNoSuch, spanNoSuch, 0, 0);
assertEquals(0, searcher.search(notNear, 10).totalHits);
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
notNear = new SpanNotQuery(term, spanWCNoSuch, 0, 0);
assertEquals(1, searcher.search(notNear, 10).totalHits);
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
notNear = new SpanNotQuery(term, spanRgxNoSuch, 1, 1);
assertEquals(1, searcher.search(notNear, 10).totalHits);
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
notNear = new SpanNotQuery(term, spanPrfxNoSuch, 1, 1);
assertEquals(1, searcher.search(notNear, 10).totalHits);
}
Aggregations