use of org.apache.lucene.search.uhighlight.Passage in project opengrok by oracle.
the class PassageConverterTest method testLinesSpanningMatch.
@Test
public void testLinesSpanningMatch() {
Passage p = new Passage();
p.setStartOffset(0);
p.setEndOffset(DOC2.length());
p.addMatch(0, p.getEndOffset(), new BytesRef(DOC2), 1);
assertEquals(1, p.getNumMatches(), "getNumMatches()");
PassageConverter cvt = getConverter((short) 0);
SortedMap<Integer, LineHighlight> linemap = cvt.convert(new Passage[] { p }, splitter2);
assertEquals(3, linemap.size(), "linemap size()");
int lineno = linemap.firstKey();
assertEquals(0, lineno, "first lineno");
assertTrue(linemap.containsKey(1), "linemap[1] exists");
assertTrue(linemap.containsKey(2), "linemap[2] exists");
LineHighlight lhi = linemap.get(lineno);
assertNotNull(lhi, "get LineHighlight");
assertEquals(0, lhi.getLelide(), "getLelide()");
assertEquals(0, lhi.getRelide(), "getRelide()");
assertEquals(1, lhi.countMarkups(), "countMarkups()");
PhraseHighlight phi = lhi.getMarkup(0);
assertNotNull(phi, "get PhraseHighlight");
assertEquals(0, phi.getLineStart(), "getLineStart()");
assertEquals(Integer.MAX_VALUE, phi.getLineEnd(), "getLineEnd()");
lhi = linemap.get(lineno + 1);
assertNotNull(lhi, "get LineHighlight");
assertEquals(0, lhi.getLelide(), "getLelide()");
assertEquals(0, lhi.getRelide(), "getRelide()");
assertEquals(1, lhi.countMarkups(), "countMarkups()");
phi = lhi.getMarkup(0);
assertNotNull(phi, "get PhraseHighlight");
assertEquals(-1, phi.getLineStart(), "getLineStart()");
assertEquals(Integer.MAX_VALUE, phi.getLineEnd(), "getLineEnd()");
lhi = linemap.get(lineno + 2);
assertNotNull(lhi, "get LineHighlight");
assertEquals(0, lhi.getLelide(), "getLelide()");
assertEquals(0, lhi.getRelide(), "getRelide()");
assertEquals(1, lhi.countMarkups(), "countMarkups()");
phi = lhi.getMarkup(0);
assertNotNull(phi, "get PhraseHighlight");
assertEquals(-1, phi.getLineStart(), "getLineStart()");
assertEquals(3, phi.getLineEnd(), "getLineEnd()");
}
use of org.apache.lucene.search.uhighlight.Passage in project opengrok by oracle.
the class PassageConverter method convert.
/**
* Converts the specified passages into a sorted map of
* {@link LineHighlight} instances keyed by line offsets.
* @param passages a defined instance
* @param splitter a defined instance
* @return a defined instance
*/
public SortedMap<Integer, LineHighlight> convert(Passage[] passages, SourceSplitter splitter) {
SortedMap<Integer, LineHighlight> res = new TreeMap<>();
for (Passage passage : passages) {
int start = passage.getStartOffset();
int end = passage.getEndOffset();
if (start >= end) {
continue;
}
int m = splitter.findLineIndex(start);
if (m < 0) {
continue;
}
int n = splitter.findLineIndex(end - 1);
if (n < 0) {
continue;
}
m = Math.max(0, m - args.getContextSurround());
n = Math.min(splitter.count() - 1, n + args.getContextSurround());
// Ensure an entry in `res' for every passage line.
for (int i = m; i <= n; ++i) {
if (!res.containsKey(i)) {
res.put(i, new LineHighlight(i));
}
}
// Create LineHighlight entries for passage matches.
for (int i = 0; i < passage.getNumMatches(); ++i) {
int mstart = passage.getMatchStarts()[i];
int mm = splitter.findLineIndex(mstart);
int mend = passage.getMatchEnds()[i];
int nn = splitter.findLineIndex(mend - 1);
if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) {
if (mm == nn) {
int lbeg = splitter.getOffset(mm);
int lstart = mstart - lbeg;
int lend = mend - lbeg;
LineHighlight lhigh = res.get(mm);
lhigh.addMarkup(PhraseHighlight.create(lstart, lend));
} else {
int lbeg = splitter.getOffset(mm);
int loff = mstart - lbeg;
LineHighlight lhigh = res.get(mm);
lhigh.addMarkup(PhraseHighlight.createStarter(loff));
lbeg = splitter.getOffset(nn);
loff = mend - lbeg;
lhigh = res.get(nn);
lhigh.addMarkup(PhraseHighlight.createEnder(loff));
/*
* Designate any intermediate lines as
* wholly-highlighted
*/
for (int j = mm + 1; j <= nn - 1; ++j) {
lhigh = res.get(j);
lhigh.addMarkup(PhraseHighlight.createEntire());
}
}
}
}
}
/*
* Condense PhraseHighlight instances within lines, and elide as
* necessary to the reportable length.
*/
for (LineHighlight lhi : res.values()) {
lhi.condenseMarkups();
String line = splitter.getLine(lhi.getLineno());
Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line);
if (eolMatcher.find()) {
line = line.substring(0, eolMatcher.start());
}
elideLine(lhi, line);
}
return res;
}
use of org.apache.lucene.search.uhighlight.Passage in project alix by oeuvres.
the class HiliteFormatter method format.
@Override
public String format(Passage[] passages, String content) {
StringBuilder sb = new StringBuilder();
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.getStartOffset() > pos && pos > 0) {
sb.append(ellipsis);
}
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
assert start >= pos && start < passage.getEndOffset();
// append content before this start
append(sb, content, pos, start);
int end = passage.getMatchEnds()[i];
assert end > start;
// Look ahead to expand 'end' past all overlapping:
while (i + 1 < passage.getNumMatches() && passage.getMatchStarts()[i + 1] < end) {
end = passage.getMatchEnds()[++i];
}
// in case match straddles past passage
end = Math.min(end, passage.getEndOffset());
sb.append(preTag);
append(sb, content, start, end);
sb.append(postTag);
pos = end;
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
pos = passage.getEndOffset();
}
return sb.toString();
}
use of org.apache.lucene.search.uhighlight.Passage in project OpenSearch by opensearch-project.
the class CustomPassageFormatterTests method testHtmlEncodeFormat.
public void testHtmlEncodeFormat() {
String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
Passage[] passages = new Passage[2];
String match = "highlighter";
BytesRef matchBytesRef = new BytesRef(match);
Passage passage1 = new Passage();
int start = content.indexOf(match);
int end = start + match.length();
passage1.setStartOffset(0);
// lets include the whitespace at the end to make sure we trim it
passage1.setEndOffset(end + 6);
passage1.addMatch(start, end, matchBytesRef, 1);
passages[0] = passage1;
Passage passage2 = new Passage();
start = content.lastIndexOf(match);
end = start + match.length();
passage2.setStartOffset(passage1.getEndOffset());
passage2.setEndOffset(content.length());
passage2.addMatch(start, end, matchBytesRef, 1);
passages[1] = passage2;
Snippet[] fragments = passageFormatter.format(passages, content);
assertThat(fragments, notNullValue());
assertThat(fragments.length, equalTo(2));
assertThat(fragments[0].getText(), equalTo("<b>This is a really cool <em>highlighter</em>.</b>"));
assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
}
use of org.apache.lucene.search.uhighlight.Passage in project OpenSearch by opensearch-project.
the class CustomPassageFormatterTests method testSimpleFormat.
public void testSimpleFormat() {
String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here.";
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
Passage[] passages = new Passage[3];
String match = "highlighter";
BytesRef matchBytesRef = new BytesRef(match);
Passage passage1 = new Passage();
int start = content.indexOf(match);
int end = start + match.length();
passage1.setStartOffset(0);
// lets include the whitespace at the end to make sure we trim it
passage1.setEndOffset(end + 2);
passage1.addMatch(start, end, matchBytesRef, 1);
passages[0] = passage1;
Passage passage2 = new Passage();
start = content.lastIndexOf(match);
end = start + match.length();
passage2.setStartOffset(passage1.getEndOffset());
passage2.setEndOffset(end + 26);
passage2.addMatch(start, end, matchBytesRef, 1);
passages[1] = passage2;
Passage passage3 = new Passage();
passage3.setStartOffset(passage2.getEndOffset());
passage3.setEndOffset(content.length());
passages[2] = passage3;
Snippet[] fragments = passageFormatter.format(passages, content);
assertThat(fragments, notNullValue());
assertThat(fragments.length, equalTo(3));
assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
assertThat(fragments[0].isHighlighted(), equalTo(true));
assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
assertThat(fragments[1].isHighlighted(), equalTo(true));
assertThat(fragments[2].getText(), equalTo("No matches here."));
assertThat(fragments[2].isHighlighted(), equalTo(false));
}
Aggregations