Search in sources :

Example 6 with SimpleHTMLEncoder

use of org.apache.lucene.search.highlight.SimpleHTMLEncoder in project openolat by klemens.

the class SearchResultsImpl method doHighlight.

/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(Query query, Analyzer analyzer, Document doc, ResultDocument resultDocument) throws IOException {
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new SimpleHTMLEncoder(), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);
        // highlight title
        String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        title = title.trim();
        if (title.length() > 128) {
            title = FilterFactory.getHtmlTagAndDescapingFilter().filter(title);
            title = Formatter.truncate(title, 128);
        }
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) SimpleHTMLEncoder(org.apache.lucene.search.highlight.SimpleHTMLEncoder) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) StringReader(java.io.StringReader) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) Highlighter(org.apache.lucene.search.highlight.Highlighter)

Aggregations

SimpleHTMLEncoder (org.apache.lucene.search.highlight.SimpleHTMLEncoder)6 StringReader (java.io.StringReader)3 TokenStream (org.apache.lucene.analysis.TokenStream)3 Highlighter (org.apache.lucene.search.highlight.Highlighter)3 InvalidTokenOffsetsException (org.apache.lucene.search.highlight.InvalidTokenOffsetsException)3 QueryScorer (org.apache.lucene.search.highlight.QueryScorer)3 SimpleHTMLFormatter (org.apache.lucene.search.highlight.SimpleHTMLFormatter)3 TermQuery (org.apache.lucene.search.TermQuery)2 Snippet (org.apache.lucene.search.highlight.Snippet)2 BytesRef (org.apache.lucene.util.BytesRef)2 File (java.io.File)1 IOException (java.io.IOException)1 Document (org.apache.lucene.document.Document)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 IndexReader (org.apache.lucene.index.IndexReader)1 Term (org.apache.lucene.index.Term)1 MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)1 ParseException (org.apache.lucene.queryparser.classic.ParseException)1 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1