Search in sources :

Example 6 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testFieldSpecificHighlighting.

public void testFieldSpecificHighlighting() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            String docMainText = "fred is one of the people";
            BooleanQuery.Builder query = new BooleanQuery.Builder();
            query.add(new TermQuery(new Term(FIELD_NAME, "fred")), Occur.SHOULD);
            query.add(new TermQuery(new Term("category", "people")), Occur.SHOULD);
            // highlighting respects fieldnames used in query
            Scorer fieldSpecificScorer = null;
            if (mode == TestHighlightRunner.QUERY) {
                fieldSpecificScorer = new QueryScorer(query.build(), FIELD_NAME);
            } else if (mode == TestHighlightRunner.QUERY_TERM) {
                fieldSpecificScorer = new QueryTermScorer(query.build(), "contents");
            }
            Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer);
            fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
            String result = fieldSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
            assertEquals("Should match", result, "<B>fred</B> is one of the people");
            // highlighting does not respect fieldnames used in query
            Scorer fieldInSpecificScorer = null;
            if (mode == TestHighlightRunner.QUERY) {
                fieldInSpecificScorer = new QueryScorer(query.build(), null);
            } else if (mode == TestHighlightRunner.QUERY_TERM) {
                fieldInSpecificScorer = new QueryTermScorer(query.build());
            }
            Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer);
            fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
            result = fieldInSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
            assertEquals("Should match", result, "<B>fred</B> is one of the <B>people</B>");
            reader.close();
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term)

Example 7 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testGetTextFragments.

public void testGetTextFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
            for (int i = 0; i < hits.totalHits; i++) {
                final int docId = hits.scoreDocs[i].doc;
                final Document doc = searcher.doc(docId);
                String text = doc.get(FIELD_NAME);
                TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
                Highlighter highlighter = getHighlighter(query, FIELD_NAME, // new Highlighter(this, new
                HighlighterTest.this);
                // QueryTermScorer(query));
                highlighter.setTextFragmenter(new SimpleFragmenter(20));
                String[] stringResults = highlighter.getBestFragments(tokenStream, text, 10);
                tokenStream = analyzer.tokenStream(FIELD_NAME, text);
                TextFragment[] fragmentResults = highlighter.getBestTextFragments(tokenStream, text, true, 10);
                assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
                for (int j = 0; j < stringResults.length; j++) {
                    if (VERBOSE)
                        System.out.println(fragmentResults[j]);
                    assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found", fragmentResults[j].toString().equals(stringResults[j]));
                }
            }
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint)

Example 8 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testUnRewrittenQuery.

public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            // test to show how rewritten query can still be used
            searcher = newSearcher(reader);
            BooleanQuery.Builder query = new BooleanQuery.Builder();
            query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
            query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
            if (VERBOSE)
                System.out.println("Searching with primitive query");
            // forget to set this and...
            // query=query.rewrite(reader);
            TopDocs hits = searcher.search(query.build(), 1000);
            // create an instance of the highlighter with the tags used to surround
            // highlighted text
            // QueryHighlightExtractor highlighter = new
            // QueryHighlightExtractor(this,
            // query, new StandardAnalyzer(TEST_VERSION));
            int maxNumFragmentsRequired = 3;
            for (int i = 0; i < hits.totalHits; i++) {
                final int docId = hits.scoreDocs[i].doc;
                final Document doc = searcher.doc(docId);
                String text = doc.get(FIELD_NAME);
                TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
                Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
                highlighter.setTextFragmenter(new SimpleFragmenter(40));
                String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
                if (VERBOSE)
                    System.out.println(highlightedText);
            }
            // We expect to have zero highlights if the query is multi-terms and is
            // not
            // rewritten!
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) TopDocs(org.apache.lucene.search.TopDocs)

Example 9 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testNoFragments.

public void testNoFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
            for (String text : texts) {
                TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
                Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);
                String result = highlighter.getBestFragment(tokenStream, text);
                assertNull("The highlight result should be null for text with no query terms", result);
            }
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term)

Example 10 with TestHighlightRunner

use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.

the class SynonymTokenizer method testOffByOne.

public void testOffByOne() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            TermQuery query = new TermQuery(new Term("data", "help"));
            Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
            hg.setTextFragmenter(new NullFragmenter());
            String match = hg.getBestFragment(analyzer, "data", "help me [54-65]");
            assertEquals("<B>help</B> me [54-65]", match);
        }
    };
    helper.start();
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) Term(org.apache.lucene.index.Term)

Aggregations

TestHighlightRunner (org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner)28 Term (org.apache.lucene.index.Term)24 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)20 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)16 TermQuery (org.apache.lucene.search.TermQuery)16 DocumentBuilder (javax.xml.parsers.DocumentBuilder)10 BooleanQuery (org.apache.lucene.search.BooleanQuery)10 Builder (org.apache.lucene.search.PhraseQuery.Builder)10 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 TokenStream (org.apache.lucene.analysis.TokenStream)8 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)6 IntPoint (org.apache.lucene.document.IntPoint)5 WildcardQuery (org.apache.lucene.search.WildcardQuery)5 SpanQuery (org.apache.lucene.search.spans.SpanQuery)5 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)4 PhraseQuery (org.apache.lucene.search.PhraseQuery)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Document (org.apache.lucene.document.Document)3 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)3 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)3