Search in sources :

Example 1 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testSpanHighlighting.

public void testSpanHighlighting() throws Exception {
    Query query1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordx")), new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false);
    Query query2 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordy")), new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false);
    BooleanQuery.Builder bquery = new BooleanQuery.Builder();
    bquery.add(query1, Occur.SHOULD);
    bquery.add(query2, Occur.SHOULD);
    doSearching(bquery.build());
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            mode = QUERY;
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
        }
    };
    helper.run();
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 7);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 2 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testGetBestSingleFragmentWithWeights.

public void testGetBestSingleFragmentWithWeights() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
            wTerms[0] = new WeightedSpanTerm(10f, "hello");
            List<PositionSpan> positionSpans = new ArrayList<>();
            positionSpans.add(new PositionSpan(0, 0));
            wTerms[0].addPositionSpans(positionSpans);
            wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
            positionSpans = new ArrayList<>();
            positionSpans.add(new PositionSpan(14, 14));
            wTerms[1].addPositionSpans(positionSpans);
            // new
            Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);
            // Highlighter(new
            // QueryTermScorer(wTerms));
            TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
            highlighter.setTextFragmenter(new SimpleFragmenter(2));
            String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
            assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]", "<B>Hello</B>".equals(result));
            // readjust weights
            wTerms[1].setWeight(50f);
            tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
            highlighter = getHighlighter(wTerms, HighlighterTest.this);
            highlighter.setTextFragmenter(new SimpleFragmenter(2));
            result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
            assertTrue("Failed to find best section using weighted terms. Found: " + result, "<B>kennedy</B>".equals(result));
        }
    };
    helper.start();
}
Also used : TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) ArrayList(java.util.ArrayList)

Example 3 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testOverlapAnalyzer2.

public void testOverlapAnalyzer2() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            String s = "Hi-Speed10 foo";
            Query query;
            Highlighter highlighter;
            String result;
            query = new TermQuery(new Term("text", "foo"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("Hi-Speed10 <B>foo</B>", result);
            query = new TermQuery(new Term("text", "10"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("Hi-Speed<B>10</B> foo", result);
            query = new TermQuery(new Term("text", "hi"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("<B>Hi</B>-Speed10 foo", result);
            query = new TermQuery(new Term("text", "speed"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("Hi-<B>Speed</B>10 foo", result);
            query = new TermQuery(new Term("text", "hispeed"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("<B>Hi-Speed</B>10 foo", result);
            BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
            booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
            booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
            query = booleanQuery.build();
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2(), s, 3, "...");
            assertEquals("<B>Hi-Speed</B>10 foo", result);
            // ///////////////// same tests, just put the bigger overlapping token
            // first
            query = new TermQuery(new Term("text", "foo"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("Hi-Speed10 <B>foo</B>", result);
            query = new TermQuery(new Term("text", "10"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("Hi-Speed<B>10</B> foo", result);
            query = new TermQuery(new Term("text", "hi"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("<B>Hi</B>-Speed10 foo", result);
            query = new TermQuery(new Term("text", "speed"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("Hi-<B>Speed</B>10 foo", result);
            query = new TermQuery(new Term("text", "hispeed"));
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("<B>Hi-Speed</B>10 foo", result);
            query = booleanQuery.build();
            highlighter = getHighlighter(query, "text", HighlighterTest.this);
            result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
            assertEquals("<B>Hi-Speed</B>10 foo", result);
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term)

Example 4 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testMaxSizeEndHighlight.

public void testMaxSizeEndHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
            TermQuery query = new TermQuery(new Term("text", "searchterm"));
            String text = "this is a text with searchterm in it";
            SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
            Highlighter hg = getHighlighter(query, "text", fm);
            hg.setTextFragmenter(new NullFragmenter());
            hg.setMaxDocCharsToAnalyze(36);
            String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
            assertTrue("Matched text should contain remainder of text after highlighted query ", match.endsWith("in it"));
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RegExp(org.apache.lucene.util.automaton.RegExp) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Term(org.apache.lucene.index.Term)

Example 5 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testGetSimpleHighlight.

public void testGetSimpleHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
            for (int i = 0; i < hits.totalHits; i++) {
                String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
                TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
                Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);
                String result = highlighter.getBestFragment(tokenStream, text);
                if (VERBOSE)
                    System.out.println("\t" + result);
            }
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) IntPoint(org.apache.lucene.document.IntPoint)

Aggregations

TestHighlightRunner (org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner)28 Term (org.apache.lucene.index.Term)24 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)20 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)16 TermQuery (org.apache.lucene.search.TermQuery)16 DocumentBuilder (javax.xml.parsers.DocumentBuilder)10 BooleanQuery (org.apache.lucene.search.BooleanQuery)10 Builder (org.apache.lucene.search.PhraseQuery.Builder)10 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 TokenStream (org.apache.lucene.analysis.TokenStream)8 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)6 IntPoint (org.apache.lucene.document.IntPoint)5 WildcardQuery (org.apache.lucene.search.WildcardQuery)5 SpanQuery (org.apache.lucene.search.spans.SpanQuery)5 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)4 PhraseQuery (org.apache.lucene.search.PhraseQuery)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Document (org.apache.lucene.document.Document)3 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)3 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)3