Search in sources :

Example 41 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInOr.

public void testNoSuchMultiTermsInOr() throws Exception {
    //test to make sure non existent multiterms aren't throwing null pointer exceptions  
    FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
    SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
    SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
    SpanOrQuery near = new SpanOrQuery(new SpanQuery[] { term, spanNoSuch });
    assertEquals(1, searcher.search(near, 10).totalHits);
    //flip
    near = new SpanOrQuery(new SpanQuery[] { spanNoSuch, term });
    assertEquals(1, searcher.search(near, 10).totalHits);
    WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
    SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
    near = new SpanOrQuery(new SpanQuery[] { term, spanWCNoSuch });
    assertEquals(1, searcher.search(near, 10).totalHits);
    RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
    SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
    near = new SpanOrQuery(new SpanQuery[] { term, spanRgxNoSuch });
    assertEquals(1, searcher.search(near, 10).totalHits);
    PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
    SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
    near = new SpanOrQuery(new SpanQuery[] { term, spanPrfxNoSuch });
    assertEquals(1, searcher.search(near, 10).totalHits);
    near = new SpanOrQuery(new SpanQuery[] { spanPrfxNoSuch });
    assertEquals(0, searcher.search(near, 10).totalHits);
    near = new SpanOrQuery(new SpanQuery[] { spanPrfxNoSuch, spanPrfxNoSuch });
    assertEquals(0, searcher.search(near, 10).totalHits);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Example 42 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInNear.

public void testNoSuchMultiTermsInNear() throws Exception {
    //test to make sure non existent multiterms aren't throwing null pointer exceptions  
    FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
    SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
    SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
    SpanQuery near = new SpanNearQuery(new SpanQuery[] { term, spanNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    //flip order
    near = new SpanNearQuery(new SpanQuery[] { spanNoSuch, term }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
    SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
    near = new SpanNearQuery(new SpanQuery[] { term, spanWCNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
    SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
    near = new SpanNearQuery(new SpanQuery[] { term, spanRgxNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
    SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
    near = new SpanNearQuery(new SpanQuery[] { term, spanPrfxNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    //test single noSuch
    near = new SpanNearQuery(new SpanQuery[] { spanPrfxNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
    //test double noSuch
    near = new SpanNearQuery(new SpanQuery[] { spanPrfxNoSuch, spanPrfxNoSuch }, 1, true);
    assertEquals(0, searcher.search(near, 10).totalHits);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Example 43 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 44 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWithMaxLen.

//
//  All tests below were *not* ported from the PostingsHighlighter; they are new to the U.H.
//
public void testWithMaxLen() throws IOException {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    //44 char long, 2 sentences
    body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    //a little past first sentence
    highlighter.setMaxLength(25);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    //ask for 2 but we'll only get 1
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertArrayEquals(new String[] { "<b>Alpha</b> <b>Bravo</b> foo foo foo. " }, snippets);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 45 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testNothingAnalyzes.

/**
   * Not empty but nothing analyzes. Ensures we address null term-vectors.
   */
public void testNothingAnalyzes() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    // just a space! (thus not empty)
    doc.add(new Field("body", " ", fieldType));
    doc.add(newTextField("id", "id", Field.Store.YES));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new Field("body", "something", fieldType));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
    Query query = new PrefixQuery(new Term("body", "nonexistent"));
    int[] docIDs = new int[1];
    docIDs[0] = docID;
    String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, docIDs, new int[] { 2 }).get("body");
    assertEquals(1, snippets.length);
    assertEquals(" ", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

PrefixQuery (org.apache.lucene.search.PrefixQuery)68 Term (org.apache.lucene.index.Term)62 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 Query (org.apache.lucene.search.Query)30 TermQuery (org.apache.lucene.search.TermQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 WildcardQuery (org.apache.lucene.search.WildcardQuery)23 BoostQuery (org.apache.lucene.search.BoostQuery)20 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)19 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)15 Document (org.apache.lucene.document.Document)14 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)14 PhraseQuery (org.apache.lucene.search.PhraseQuery)14 RegexpQuery (org.apache.lucene.search.RegexpQuery)13 TopDocs (org.apache.lucene.search.TopDocs)13 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)11 Field (org.apache.lucene.document.Field)10