Search in sources :

Example 21 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project elasticsearch by elastic.

the class KeywordFieldTypeTests method testFuzzyQuery.

public void testFuzzyQuery() {
    MappedFieldType ft = createDefaultFieldType();
    ft.setName("field");
    ft.setIndexOptions(IndexOptions.DOCS);
    assertEquals(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
    ft.setIndexOptions(IndexOptions.NONE);
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
    assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
Also used : FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term)

Example 22 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project languagetool by languagetool-org.

the class SimilarWordFinder method findSimilarWordsTo.

private void findSimilarWordsTo(DirectoryReader reader, IndexSearcher searcher, String word) throws IOException {
    // a missing char counts as a distance of 2
    FuzzyQuery query = new FuzzyQuery(new Term("word", word), 2);
    TopDocs topDocs = searcher.search(query, 10);
    //System.out.println(topDocs.totalHits + " hits for " + word);
    List<SimWord> simWords = findSimilarWordsFor(reader, word, topDocs);
    //System.out.println(word + " -> " + String.join(", ", simWords));
    for (SimWord simWord : simWords) {
        if (word.length() == simWord.word.length()) {
            int firstDiffPos = getDiffPos(simWord.word.toLowerCase(), word.toLowerCase());
            try {
                float dist = keyDistance.getDistance(word.charAt(firstDiffPos), simWord.word.charAt(firstDiffPos));
                System.out.println(dist + "; " + word + "; " + simWord);
            } catch (Exception e) {
                System.err.println("Could not get distance between '" + word + "' and '" + simWord + "':");
                e.printStackTrace();
            }
        } else {
        // TODO: these need to be handled, too
        //System.out.println("-; " + word + "; " + simWord.word);
        }
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IOException(java.io.IOException)

Example 23 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestPrecedenceQueryParser method testWildcard.

public void testWildcard() throws Exception {
    assertQueryEquals("term*", null, "term*");
    assertQueryEquals("term*^2", null, "(term*)^2.0");
    assertQueryEquals("term~", null, "term~2");
    assertQueryEquals("term~0.7", null, "term~1");
    assertQueryEquals("term~^3", null, "(term~2)^3.0");
    assertQueryEquals("term^3~", null, "(term~2)^3.0");
    assertQueryEquals("term*germ", null, "term*germ");
    assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
    assertTrue(getQuery("term*", null) instanceof PrefixQuery);
    assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
    assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
    assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
    assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
    FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
    assertEquals(1, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    fq = (FuzzyQuery) getQuery("term~", null);
    assertEquals(2, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    expectThrows(ParseException.class, () -> {
        // value > 1, throws exception
        getQuery("term~1.1", null);
    });
    assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
    /*
     * Tests to see that wild card terms are (or are not) properly lower-cased
     * with propery parser configuration
     */
    // First prefix queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Term*", "term*");
    // explicitly set lowercase:
    assertWildcardQueryEquals("term*", "term*");
    assertWildcardQueryEquals("Term*", "term*");
    assertWildcardQueryEquals("TERM*", "term*");
    // Then 'full' wildcard queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Te?m", "te?m");
    // explicitly set lowercase:
    assertWildcardQueryEquals("te?m", "te?m");
    assertWildcardQueryEquals("Te?m", "te?m");
    assertWildcardQueryEquals("TE?M", "te?m");
    assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
    // Fuzzy queries:
    assertWildcardQueryEquals("Term~", "term~2");
    // Range queries:
    assertWildcardQueryEquals("[A TO C]", "[a TO c]");
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 24 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class QueryParsing method toString.

/**
   * @see #toString(Query,IndexSchema)
   */
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
    // clear the boosted / is clause flags for recursion
    int subflag = flags & ~(FLAG_BOOSTED | FLAG_IS_CLAUSE);
    if (query instanceof TermQuery) {
        TermQuery q = (TermQuery) query;
        Term t = q.getTerm();
        FieldType ft = writeFieldName(t.field(), schema, out, flags);
        writeFieldVal(t.bytes(), ft, out, flags);
    } else if (query instanceof TermRangeQuery) {
        TermRangeQuery q = (TermRangeQuery) query;
        String fname = q.getField();
        FieldType ft = writeFieldName(fname, schema, out, flags);
        out.append(q.includesLower() ? '[' : '{');
        BytesRef lt = q.getLowerTerm();
        BytesRef ut = q.getUpperTerm();
        if (lt == null) {
            out.append('*');
        } else {
            writeFieldVal(lt, ft, out, flags);
        }
        out.append(" TO ");
        if (ut == null) {
            out.append('*');
        } else {
            writeFieldVal(ut, ft, out, flags);
        }
        out.append(q.includesUpper() ? ']' : '}');
    } else if (query instanceof LegacyNumericRangeQuery) {
        LegacyNumericRangeQuery q = (LegacyNumericRangeQuery) query;
        String fname = q.getField();
        FieldType ft = writeFieldName(fname, schema, out, flags);
        out.append(q.includesMin() ? '[' : '{');
        Number lt = q.getMin();
        Number ut = q.getMax();
        if (lt == null) {
            out.append('*');
        } else {
            out.append(lt.toString());
        }
        out.append(" TO ");
        if (ut == null) {
            out.append('*');
        } else {
            out.append(ut.toString());
        }
        out.append(q.includesMax() ? ']' : '}');
    } else if (query instanceof BooleanQuery) {
        BooleanQuery q = (BooleanQuery) query;
        boolean needParens = false;
        if (q.getMinimumNumberShouldMatch() != 0 || (flags & (FLAG_IS_CLAUSE | FLAG_BOOSTED)) != 0) {
            needParens = true;
        }
        if (needParens) {
            out.append('(');
        }
        boolean first = true;
        for (BooleanClause c : q.clauses()) {
            if (!first) {
                out.append(' ');
            } else {
                first = false;
            }
            if (c.isProhibited()) {
                out.append('-');
            } else if (c.isRequired()) {
                out.append('+');
            }
            Query subQuery = c.getQuery();
            toString(subQuery, schema, out, subflag | FLAG_IS_CLAUSE);
        }
        if (needParens) {
            out.append(')');
        }
        if (q.getMinimumNumberShouldMatch() > 0) {
            out.append('~');
            out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
        }
    } else if (query instanceof PrefixQuery) {
        PrefixQuery q = (PrefixQuery) query;
        Term prefix = q.getPrefix();
        FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
        out.append(prefix.text());
        out.append('*');
    } else if (query instanceof WildcardQuery) {
        out.append(query.toString());
    } else if (query instanceof FuzzyQuery) {
        out.append(query.toString());
    } else if (query instanceof ConstantScoreQuery) {
        out.append(query.toString());
    } else if (query instanceof WrappedQuery) {
        WrappedQuery q = (WrappedQuery) query;
        out.append(q.getOptions());
        toString(q.getWrappedQuery(), schema, out, subflag);
    } else if (query instanceof BoostQuery) {
        BoostQuery q = (BoostQuery) query;
        toString(q.getQuery(), schema, out, subflag | FLAG_BOOSTED);
        out.append("^");
        out.append(Float.toString(q.getBoost()));
    } else {
        out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')');
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) LegacyNumericRangeQuery(org.apache.solr.legacy.LegacyNumericRangeQuery) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) LegacyNumericRangeQuery(org.apache.solr.legacy.LegacyNumericRangeQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) FieldType(org.apache.solr.schema.FieldType) BooleanClause(org.apache.lucene.search.BooleanClause) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 25 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInNotNear.

public void testNoSuchMultiTermsInNotNear() throws Exception {
    //test to make sure non existent multiterms aren't throwing non-matching field exceptions  
    FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
    SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
    SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
    SpanNotQuery notNear = new SpanNotQuery(term, spanNoSuch, 0, 0);
    assertEquals(1, searcher.search(notNear, 10).totalHits);
    //flip
    notNear = new SpanNotQuery(spanNoSuch, term, 0, 0);
    assertEquals(0, searcher.search(notNear, 10).totalHits);
    //both noSuch
    notNear = new SpanNotQuery(spanNoSuch, spanNoSuch, 0, 0);
    assertEquals(0, searcher.search(notNear, 10).totalHits);
    WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
    SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
    notNear = new SpanNotQuery(term, spanWCNoSuch, 0, 0);
    assertEquals(1, searcher.search(notNear, 10).totalHits);
    RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
    SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
    notNear = new SpanNotQuery(term, spanRgxNoSuch, 1, 1);
    assertEquals(1, searcher.search(notNear, 10).totalHits);
    PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
    SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
    notNear = new SpanNotQuery(term, spanPrfxNoSuch, 1, 1);
    assertEquals(1, searcher.search(notNear, 10).totalHits);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Aggregations

FuzzyQuery (org.apache.lucene.search.FuzzyQuery)34 Term (org.apache.lucene.index.Term)26 PrefixQuery (org.apache.lucene.search.PrefixQuery)20 BooleanQuery (org.apache.lucene.search.BooleanQuery)17 BoostQuery (org.apache.lucene.search.BoostQuery)16 Query (org.apache.lucene.search.Query)16 TermQuery (org.apache.lucene.search.TermQuery)12 WildcardQuery (org.apache.lucene.search.WildcardQuery)12 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 PhraseQuery (org.apache.lucene.search.PhraseQuery)8 RegexpQuery (org.apache.lucene.search.RegexpQuery)7 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)6 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)6 BooleanClause (org.apache.lucene.search.BooleanClause)5 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)5 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)5 TopDocs (org.apache.lucene.search.TopDocs)5 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)5 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)5 Map (java.util.Map)4