Search in sources :

Example 11 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInSpanFirst.

public void testNoSuchMultiTermsInSpanFirst() throws Exception {
    //this hasn't been a problem  
    FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
    SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
    SpanQuery spanFirst = new SpanFirstQuery(spanNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
    SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
    spanFirst = new SpanFirstQuery(spanWCNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
    SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
    spanFirst = new SpanFirstQuery(spanRgxNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
    SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
    spanFirst = new SpanFirstQuery(spanPrfxNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Example 12 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testFuzzy.

public void testFuzzy() throws Exception {
    FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"));
    SpanQuery sfq = new SpanMultiTermQueryWrapper<>(fq);
    // will not match quick brown fox
    SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 3, 6);
    assertEquals(2, searcher.search(sprq, 10).totalHits);
}
Also used : FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term)

Example 13 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWhichMTQMatched.

/**
   * Runs a query with two MTQs and confirms the formatter
   * can tell which query matched which hit.
   */
public void testWhichMTQMatched() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    // use a variety of common MTQ types
    BooleanQuery query = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD).add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    // Default formatter just bolds each hit:
    assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
    // Now use our own formatter, that also stuffs the
    // matching term's text into the result:
    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected PassageFormatter getFormatter(String field) {
            return new PassageFormatter() {

                @Override
                public Object format(Passage[] passages, String content) {
                    // Copied from DefaultPassageFormatter, but
                    // tweaked to include the matched term:
                    StringBuilder sb = new StringBuilder();
                    int pos = 0;
                    for (Passage passage : passages) {
                        // don't add ellipsis if its the first one, or if its connected.
                        if (passage.getStartOffset() > pos && pos > 0) {
                            sb.append("... ");
                        }
                        pos = passage.getStartOffset();
                        for (int i = 0; i < passage.getNumMatches(); i++) {
                            int start = passage.getMatchStarts()[i];
                            int end = passage.getMatchEnds()[i];
                            // its possible to have overlapping terms
                            if (start > pos) {
                                sb.append(content, pos, start);
                            }
                            if (end > pos) {
                                sb.append("<b>");
                                sb.append(content, Math.max(pos, start), end);
                                sb.append('(');
                                sb.append(passage.getMatchTerms()[i].utf8ToString());
                                sb.append(')');
                                sb.append("</b>");
                                pos = end;
                            }
                        }
                        // its possible a "term" from the analyzer could span a sentence boundary.
                        sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
                        pos = passage.getEndOffset();
                    }
                    return sb.toString();
                }
            };
        }
    };
    assertEquals(1, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 14 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testOneFuzzy.

public void testOneFuzzy() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new FuzzyQuery(new Term("body", "tets"), 1);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // with prefix
    query = new FuzzyQuery(new Term("body", "tets"), 1, 2);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // wrong field
    BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("bogus", "tets"), 1), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 15 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestQPHelper method testWildcard.

public void testWildcard() throws Exception {
    assertQueryEquals("term*", null, "term*");
    assertQueryEquals("term*^2", null, "(term*)^2.0");
    assertQueryEquals("term~", null, "term~2");
    assertQueryEquals("term~0.7", null, "term~1");
    assertQueryEquals("term~^3", null, "(term~2)^3.0");
    assertQueryEquals("term^3~", null, "(term~2)^3.0");
    assertQueryEquals("term*germ", null, "term*germ");
    assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
    assertTrue(getQuery("term*", null) instanceof PrefixQuery);
    assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
    assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
    assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
    assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
    FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
    assertEquals(1, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    fq = (FuzzyQuery) getQuery("term~", null);
    assertEquals(2, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    // value > 1, throws exception
    assertQueryNodeException("term~1.1");
    assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
    /*
     * Tests to see that wild card terms are (or are not) properly lower-cased
     * with propery parser configuration
     */
    // First prefix queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Term*", "term*");
    // explicitly set lowercase:
    assertWildcardQueryEquals("term*", "term*");
    assertWildcardQueryEquals("Term*", "term*");
    assertWildcardQueryEquals("TERM*", "term*");
    // Then 'full' wildcard queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Te?m", "te?m");
    // explicitly set lowercase:
    assertWildcardQueryEquals("te?m", "te?m");
    assertWildcardQueryEquals("Te?m", "te?m");
    assertWildcardQueryEquals("TE?M", "te?m");
    assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
    // Fuzzy queries:
    assertWildcardQueryEquals("Term~", "term~2");
    // Range queries:
    // TODO: implement this on QueryParser
    // Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
    // C]': Lexical error at line 1, column 1. Encountered: "[" (91), after
    // : ""
    assertWildcardQueryEquals("[A TO C]", "[a TO c]");
    // Test suffix queries: first disallow
    expectThrows(QueryNodeException.class, () -> {
        assertWildcardQueryEquals("*Term", "*term");
    });
    expectThrows(QueryNodeException.class, () -> {
        assertWildcardQueryEquals("?Term", "?term");
    });
    // Test suffix queries: then allow
    assertWildcardQueryEquals("*Term", "*term", true);
    assertWildcardQueryEquals("?Term", "?term", true);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) BoostQuery(org.apache.lucene.search.BoostQuery)

Aggregations

FuzzyQuery (org.apache.lucene.search.FuzzyQuery)34 Term (org.apache.lucene.index.Term)26 PrefixQuery (org.apache.lucene.search.PrefixQuery)20 BooleanQuery (org.apache.lucene.search.BooleanQuery)17 BoostQuery (org.apache.lucene.search.BoostQuery)16 Query (org.apache.lucene.search.Query)16 TermQuery (org.apache.lucene.search.TermQuery)12 WildcardQuery (org.apache.lucene.search.WildcardQuery)12 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 PhraseQuery (org.apache.lucene.search.PhraseQuery)8 RegexpQuery (org.apache.lucene.search.RegexpQuery)7 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)6 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)6 BooleanClause (org.apache.lucene.search.BooleanClause)5 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)5 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)5 TopDocs (org.apache.lucene.search.TopDocs)5 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)5 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)5 Map (java.util.Map)4