Search in sources :

Example 11 with Passage

use of org.apache.lucene.search.uhighlight.Passage in project opengrok by oracle.

the class PassageConverterTest method testLinesSpanningMatch.

@Test
public void testLinesSpanningMatch() {
    Passage p = new Passage();
    p.setStartOffset(0);
    p.setEndOffset(DOC2.length());
    p.addMatch(0, p.getEndOffset(), new BytesRef(DOC2), 1);
    assertEquals(1, p.getNumMatches(), "getNumMatches()");
    PassageConverter cvt = getConverter((short) 0);
    SortedMap<Integer, LineHighlight> linemap = cvt.convert(new Passage[] { p }, splitter2);
    assertEquals(3, linemap.size(), "linemap size()");
    int lineno = linemap.firstKey();
    assertEquals(0, lineno, "first lineno");
    assertTrue(linemap.containsKey(1), "linemap[1] exists");
    assertTrue(linemap.containsKey(2), "linemap[2] exists");
    LineHighlight lhi = linemap.get(lineno);
    assertNotNull(lhi, "get LineHighlight");
    assertEquals(0, lhi.getLelide(), "getLelide()");
    assertEquals(0, lhi.getRelide(), "getRelide()");
    assertEquals(1, lhi.countMarkups(), "countMarkups()");
    PhraseHighlight phi = lhi.getMarkup(0);
    assertNotNull(phi, "get PhraseHighlight");
    assertEquals(0, phi.getLineStart(), "getLineStart()");
    assertEquals(Integer.MAX_VALUE, phi.getLineEnd(), "getLineEnd()");
    lhi = linemap.get(lineno + 1);
    assertNotNull(lhi, "get LineHighlight");
    assertEquals(0, lhi.getLelide(), "getLelide()");
    assertEquals(0, lhi.getRelide(), "getRelide()");
    assertEquals(1, lhi.countMarkups(), "countMarkups()");
    phi = lhi.getMarkup(0);
    assertNotNull(phi, "get PhraseHighlight");
    assertEquals(-1, phi.getLineStart(), "getLineStart()");
    assertEquals(Integer.MAX_VALUE, phi.getLineEnd(), "getLineEnd()");
    lhi = linemap.get(lineno + 2);
    assertNotNull(lhi, "get LineHighlight");
    assertEquals(0, lhi.getLelide(), "getLelide()");
    assertEquals(0, lhi.getRelide(), "getRelide()");
    assertEquals(1, lhi.countMarkups(), "countMarkups()");
    phi = lhi.getMarkup(0);
    assertNotNull(phi, "get PhraseHighlight");
    assertEquals(-1, phi.getLineStart(), "getLineStart()");
    assertEquals(3, phi.getLineEnd(), "getLineEnd()");
}
Also used : Passage(org.apache.lucene.search.uhighlight.Passage) BytesRef(org.apache.lucene.util.BytesRef) Test(org.junit.jupiter.api.Test)

Example 12 with Passage

use of org.apache.lucene.search.uhighlight.Passage in project opengrok by oracle.

the class PassageConverter method convert.

/**
 * Converts the specified passages into a sorted map of
 * {@link LineHighlight} instances keyed by line offsets.
 * @param passages a defined instance
 * @param splitter a defined instance
 * @return a defined instance
 */
public SortedMap<Integer, LineHighlight> convert(Passage[] passages, SourceSplitter splitter) {
    SortedMap<Integer, LineHighlight> res = new TreeMap<>();
    for (Passage passage : passages) {
        int start = passage.getStartOffset();
        int end = passage.getEndOffset();
        if (start >= end) {
            continue;
        }
        int m = splitter.findLineIndex(start);
        if (m < 0) {
            continue;
        }
        int n = splitter.findLineIndex(end - 1);
        if (n < 0) {
            continue;
        }
        m = Math.max(0, m - args.getContextSurround());
        n = Math.min(splitter.count() - 1, n + args.getContextSurround());
        // Ensure an entry in `res' for every passage line.
        for (int i = m; i <= n; ++i) {
            if (!res.containsKey(i)) {
                res.put(i, new LineHighlight(i));
            }
        }
        // Create LineHighlight entries for passage matches.
        for (int i = 0; i < passage.getNumMatches(); ++i) {
            int mstart = passage.getMatchStarts()[i];
            int mm = splitter.findLineIndex(mstart);
            int mend = passage.getMatchEnds()[i];
            int nn = splitter.findLineIndex(mend - 1);
            if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) {
                if (mm == nn) {
                    int lbeg = splitter.getOffset(mm);
                    int lstart = mstart - lbeg;
                    int lend = mend - lbeg;
                    LineHighlight lhigh = res.get(mm);
                    lhigh.addMarkup(PhraseHighlight.create(lstart, lend));
                } else {
                    int lbeg = splitter.getOffset(mm);
                    int loff = mstart - lbeg;
                    LineHighlight lhigh = res.get(mm);
                    lhigh.addMarkup(PhraseHighlight.createStarter(loff));
                    lbeg = splitter.getOffset(nn);
                    loff = mend - lbeg;
                    lhigh = res.get(nn);
                    lhigh.addMarkup(PhraseHighlight.createEnder(loff));
                    /*
                         * Designate any intermediate lines as
                         * wholly-highlighted
                         */
                    for (int j = mm + 1; j <= nn - 1; ++j) {
                        lhigh = res.get(j);
                        lhigh.addMarkup(PhraseHighlight.createEntire());
                    }
                }
            }
        }
    }
    /*
         * Condense PhraseHighlight instances within lines, and elide as
         * necessary to the reportable length.
         */
    for (LineHighlight lhi : res.values()) {
        lhi.condenseMarkups();
        String line = splitter.getLine(lhi.getLineno());
        Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line);
        if (eolMatcher.find()) {
            line = line.substring(0, eolMatcher.start());
        }
        elideLine(lhi, line);
    }
    return res;
}
Also used : Matcher(java.util.regex.Matcher) TreeMap(java.util.TreeMap) Passage(org.apache.lucene.search.uhighlight.Passage)

Example 13 with Passage

use of org.apache.lucene.search.uhighlight.Passage in project alix by oeuvres.

the class HiliteFormatter method format.

@Override
public String format(Passage[] passages, String content) {
    StringBuilder sb = new StringBuilder();
    int pos = 0;
    for (Passage passage : passages) {
        // don't add ellipsis if its the first one, or if its connected.
        if (passage.getStartOffset() > pos && pos > 0) {
            sb.append(ellipsis);
        }
        pos = passage.getStartOffset();
        for (int i = 0; i < passage.getNumMatches(); i++) {
            int start = passage.getMatchStarts()[i];
            assert start >= pos && start < passage.getEndOffset();
            // append content before this start
            append(sb, content, pos, start);
            int end = passage.getMatchEnds()[i];
            assert end > start;
            // Look ahead to expand 'end' past all overlapping:
            while (i + 1 < passage.getNumMatches() && passage.getMatchStarts()[i + 1] < end) {
                end = passage.getMatchEnds()[++i];
            }
            // in case match straddles past passage
            end = Math.min(end, passage.getEndOffset());
            sb.append(preTag);
            append(sb, content, start, end);
            sb.append(postTag);
            pos = end;
        }
        // its possible a "term" from the analyzer could span a sentence boundary.
        append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
        pos = passage.getEndOffset();
    }
    return sb.toString();
}
Also used : Passage(org.apache.lucene.search.uhighlight.Passage)

Example 14 with Passage

use of org.apache.lucene.search.uhighlight.Passage in project OpenSearch by opensearch-project.

the class CustomPassageFormatterTests method testHtmlEncodeFormat.

public void testHtmlEncodeFormat() {
    String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
    Passage[] passages = new Passage[2];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.setStartOffset(0);
    // lets include the whitespace at the end to make sure we trim it
    passage1.setEndOffset(end + 6);
    passage1.addMatch(start, end, matchBytesRef, 1);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.setStartOffset(passage1.getEndOffset());
    passage2.setEndOffset(content.length());
    passage2.addMatch(start, end, matchBytesRef, 1);
    passages[1] = passage2;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(2));
    assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
    assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
}
Also used : SimpleHTMLEncoder(org.apache.lucene.search.highlight.SimpleHTMLEncoder) CustomPassageFormatter(org.apache.lucene.search.uhighlight.CustomPassageFormatter) Snippet(org.apache.lucene.search.uhighlight.Snippet) Passage(org.apache.lucene.search.uhighlight.Passage) BytesRef(org.apache.lucene.util.BytesRef)

Example 15 with Passage

use of org.apache.lucene.search.uhighlight.Passage in project OpenSearch by opensearch-project.

the class CustomPassageFormatterTests method testSimpleFormat.

public void testSimpleFormat() {
    String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
    Passage[] passages = new Passage[3];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.setStartOffset(0);
    // lets include the whitespace at the end to make sure we trim it
    passage1.setEndOffset(end + 2);
    passage1.addMatch(start, end, matchBytesRef, 1);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.setStartOffset(passage1.getEndOffset());
    passage2.setEndOffset(end + 26);
    passage2.addMatch(start, end, matchBytesRef, 1);
    passages[1] = passage2;
    Passage passage3 = new Passage();
    passage3.setStartOffset(passage2.getEndOffset());
    passage3.setEndOffset(content.length());
    passages[2] = passage3;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(3));
    assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
    assertThat(fragments[0].isHighlighted(), equalTo(true));
    assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
    assertThat(fragments[1].isHighlighted(), equalTo(true));
    assertThat(fragments[2].getText(), equalTo("No matches here."));
    assertThat(fragments[2].isHighlighted(), equalTo(false));
}
Also used : DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) CustomPassageFormatter(org.apache.lucene.search.uhighlight.CustomPassageFormatter) Snippet(org.apache.lucene.search.uhighlight.Snippet) Passage(org.apache.lucene.search.uhighlight.Passage) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

Passage (org.apache.lucene.search.uhighlight.Passage)30 BytesRef (org.apache.lucene.util.BytesRef)24 Test (org.junit.jupiter.api.Test)22 Snippet (org.apache.lucene.search.uhighlight.Snippet)3 TreeMap (java.util.TreeMap)2 Matcher (java.util.regex.Matcher)2 CustomPassageFormatter (org.apache.lucene.search.uhighlight.CustomPassageFormatter)2 Test (org.junit.Test)2 List (java.util.List)1 DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)1 SimpleHTMLEncoder (org.apache.lucene.search.highlight.SimpleHTMLEncoder)1 FieldHighlighter (org.apache.lucene.search.uhighlight.FieldHighlighter)1 PassageFormatter (org.apache.lucene.search.uhighlight.PassageFormatter)1 AnnotationToken (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken)1