Search in sources :

Example 26 with TermRangeQuery

use of org.apache.lucene.search.TermRangeQuery in project lucene-solr by apache.

the class SynonymTokenizer method testGetRangeFragments.

public void testGetRangeFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            // Need to explicitly set the QueryParser property to use TermRangeQuery
            // rather
            // than RangeFilters
            TermRangeQuery rangeQuery = new TermRangeQuery(FIELD_NAME, new BytesRef("kannedy"), new BytesRef("kznnedy"), true, true);
            rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
            query = rangeQuery;
            doSearching(query);
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
        }
    };
    helper.start();
}
Also used : TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 27 with TermRangeQuery

use of org.apache.lucene.search.TermRangeQuery in project lucene-solr by apache.

the class SynonymTokenizer method testNotRewriteMultiTermQuery.

public void testNotRewriteMultiTermQuery() throws IOException {
    // field "bar": (not the field we ultimately want to extract)
    MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true);
    WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {

        @Override
        protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
            assertEquals(mtq, query);
            super.extract(query, boost, terms);
        }
    };
    extractor.setExpandMultiTermQuery(true);
    extractor.setMaxDocCharsToAnalyze(51200);
    extractor.getWeightedSpanTerms(mtq, 3, new CannedTokenStream(new Token("aa", 0, 2), new Token("bb", 2, 4)), // field "foo"
    "foo");
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Map(java.util.Map) HashMap(java.util.HashMap) BytesRef(org.apache.lucene.util.BytesRef)

Example 28 with TermRangeQuery

use of org.apache.lucene.search.TermRangeQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 29 with TermRangeQuery

use of org.apache.lucene.search.TermRangeQuery in project tika by apache.

the class RecentFiles method generateRSS.

public String generateRSS(File indexFile) throws CorruptIndexException, IOException {
    StringBuffer output = new StringBuffer();
    output.append(getRSSHeaders());
    IndexSearcher searcher = null;
    try {
        reader = IndexReader.open(new SimpleFSDirectory(indexFile));
        searcher = new IndexSearcher(reader);
        GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
        gc.setTime(new Date());
        String nowDateTime = ISO8601.format(gc);
        gc.add(java.util.GregorianCalendar.MINUTE, -5);
        String fiveMinsAgo = ISO8601.format(gc);
        TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), fiveMinsAgo, nowDateTime, true, true);
        TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            output.append(getRSSItem(doc));
        }
    } finally {
        if (reader != null)
            reader.close();
        if (searcher != null)
            searcher.close();
    }
    output.append(getRSSFooters());
    return output.toString();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) GregorianCalendar(java.util.GregorianCalendar) Document(org.apache.lucene.document.Document) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) Date(java.util.Date) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Aggregations

TermRangeQuery (org.apache.lucene.search.TermRangeQuery)29 BooleanQuery (org.apache.lucene.search.BooleanQuery)13 TermQuery (org.apache.lucene.search.TermQuery)11 IndexSearcher (org.apache.lucene.search.IndexSearcher)10 Query (org.apache.lucene.search.Query)10 BytesRef (org.apache.lucene.util.BytesRef)10 Term (org.apache.lucene.index.Term)8 Document (org.apache.lucene.document.Document)7 IndexReader (org.apache.lucene.index.IndexReader)5 BoostQuery (org.apache.lucene.search.BoostQuery)5 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)5 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)5 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)5 PrefixQuery (org.apache.lucene.search.PrefixQuery)5 ScoreDoc (org.apache.lucene.search.ScoreDoc)5 TextField (org.apache.lucene.document.TextField)4 IndexWriter (org.apache.lucene.index.IndexWriter)3 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 PhraseQuery (org.apache.lucene.search.PhraseQuery)3 WildcardQuery (org.apache.lucene.search.WildcardQuery)3