Example 1 with SimpleHTMLEncoder

public void testHtmlEncodeFormat() {
    String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
    Passage[] passages = new Passage[2];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    //lets include the whitespace at the end to make sure we trim it
    passage1.setEndOffset(end + 6);
    passage1.addMatch(start, end, matchBytesRef);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.addMatch(start, end, matchBytesRef);
    passages[1] = passage2;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(2));
    assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
    assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
Example 2 with SimpleHTMLEncoder

public void testHtmlEncodeFormat() {
    String content = "<b>This is a really cool highlighter.</b> Postings highlighter gives nice snippets back.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
    Passage[] passages = new Passage[2];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.startOffset = 0;
    //lets include the whitespace at the end to make sure we trim it
    passage1.endOffset = end + 6;
    passage1.addMatch(start, end, matchBytesRef);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.startOffset = passage1.endOffset;
    passage2.endOffset = content.length();
    passage2.addMatch(start, end, matchBytesRef);
    passages[1] = passage2;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(2));
    assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
    assertThat(fragments[1].getText(), equalTo("Postings <em>highlighter</em> gives nice snippets back."));
Example 3 with SimpleHTMLEncoder

public void testTagsAndEncoder() throws Exception {
    FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>");
    SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
    String[] preTags = { "[" };
    String[] postTags = { "]" };
    assertEquals("&lt;h1&gt; [a] &lt;&#x2F;h1&gt;", sfb.createFragment(reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder()));
Example 4 with SimpleHTMLEncoder

 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
private void doHighlight(Query query, Analyzer analyzer, Document doc, ResultDocument resultDocument) throws IOException {
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new SimpleHTMLEncoder(), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
        // highlight title
        String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        title = title.trim();
        if (title.length() > 128) {
            title = FilterFactory.getHtmlTagAndDescapingFilter().filter(title);
            title = Formatter.truncate(title, 128);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
    } catch (InvalidTokenOffsetsException e) {
        log.warn("", e);
Example 5 with SimpleHTMLEncoder

 *  Searches pages using a particular combination of flags.
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
public Collection findPages(String query, int flags, WikiContext wikiContext) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;
    try {
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_47, queryfields, getLuceneAnalyzer());
        // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);
        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader =;
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
  "Lucene not yet ready; indexing not started", ex);
            return null;
        ScoreDoc[] hits =, MAX_SEARCH_HITS).scoreDocs;
        AuthorizationManager mgr = m_engine.getAuthorizationManager();
        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);
            if (page != null) {
                if (page instanceof Attachment) {
                // Currently attachments don't look nice on the search-results page
                // When the search-results are cleaned up this can be enabled again.
                PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION);
                if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) {
                    int score = (int) (hits[curr].score * 100);
                    // Get highlighted search contexts
                    String text = doc.get(LUCENE_PAGE_CONTENTS);
                    String[] fragments = new String[0];
                    if (text != null && highlighter != null) {
                        TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
                        fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
                    SearchResult result = new SearchResultImpl(page, score, fragments);
            } else {
                log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {"Broken query; cannot parse query ", e);
        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
            } catch (IOException e) {
    return list;
Also used : IndexSearcher( TokenStream(org.apache.lucene.analysis.TokenStream) Query( TermQuery( ProviderException( WikiPage( Attachment( Document(org.apache.lucene.document.Document) ScoreDoc( InvalidTokenOffsetsException( StringReader( Highlighter( Directory( SimpleFSDirectory( SimpleHTMLEncoder( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryScorer( IOException( SimpleFSDirectory( CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoRequiredPropertyException( InternalWikiException( ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException( InvalidTokenOffsetsException( IOException( ProviderException( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) AuthorizationManager( ParseException(org.apache.lucene.queryparser.classic.ParseException) SimpleHTMLFormatter( File( PagePermission(


