use of org.apache.lucene.search.uhighlight.Snippet in project OpenSearch by opensearch-project.
the class AnnotatedTextHighlighterTests method assertHighlightOneDoc.
private void assertHighlightOneDoc(String fieldName, String[] markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
// Annotated fields wrap the usual analyzer with one that injects extra tokens
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
iwc.setMergePolicy(newTieredMergePolicy(random()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_STORED);
if (randomBoolean()) {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
} else {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
ft.freeze();
Document doc = new Document();
for (String input : markedUpInputs) {
Field field = new Field(fieldName, "", ft);
field.setStringValue(input);
doc.add(field);
}
iw.addDocument(doc);
DirectoryReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
passageFormatter.setAnnotations(annotations);
ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
}
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits.value, equalTo(1L));
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, name -> "text".equals(name), Integer.MAX_VALUE, Integer.MAX_VALUE);
highlighter.setFieldMatcher((name) -> "text".equals(name));
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
assertEquals(expectedPassages.length, snippets.length);
for (int i = 0; i < snippets.length; i++) {
assertEquals(expectedPassages[i], snippets[i].getText());
}
reader.close();
dir.close();
}
use of org.apache.lucene.search.uhighlight.Snippet in project OpenSearch by opensearch-project.
the class AnnotatedPassageFormatter method format.
@Override
public Snippet[] format(Passage[] passages, String content) {
Snippet[] snippets = new Snippet[passages.length];
int pos;
int j = 0;
for (Passage passage : passages) {
AnnotationToken[] annotations = getIntersectingAnnotations(passage.getStartOffset(), passage.getEndOffset());
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
StringBuilder sb = new StringBuilder();
pos = passage.getStartOffset();
for (Markup markup : mergedMarkup.markups) {
int start = markup.start;
int end = markup.end;
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
}
if (end > pos) {
sb.append("[");
append(sb, content, Math.max(pos, start), end);
sb.append("](");
sb.append(markup.metadata);
sb.append(")");
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
// we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
} else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
}
// and we trim the snippets too
snippets[j++] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0);
}
return snippets;
}
use of org.apache.lucene.search.uhighlight.Snippet in project OpenSearch by opensearch-project.
the class UnifiedHighlighter method highlight.
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
@SuppressWarnings("unchecked") Map<String, CustomUnifiedHighlighter> cache = (Map<String, CustomUnifiedHighlighter>) fieldContext.cache.computeIfAbsent(UnifiedHighlighter.class.getName(), k -> new HashMap<>());
if (cache.containsKey(fieldContext.fieldName) == false) {
cache.put(fieldContext.fieldName, buildHighlighter(fieldContext));
}
CustomUnifiedHighlighter highlighter = cache.get(fieldContext.fieldName);
MappedFieldType fieldType = fieldContext.fieldType;
SearchHighlightContext.Field field = fieldContext.field;
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
CheckedSupplier<String, IOException> loadFieldValues = () -> {
List<Object> fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType, hitContext, fieldContext.forceSource);
if (fieldValues.size() == 0) {
return null;
}
return mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
};
Snippet[] fieldSnippets = highlighter.highlightField(hitContext.reader(), hitContext.docId(), loadFieldValues);
if (fieldSnippets == null || fieldSnippets.length == 0) {
return null;
}
List<Snippet> snippets = new ArrayList<>(fieldSnippets.length);
for (Snippet fieldSnippet : fieldSnippets) {
if (Strings.hasText(fieldSnippet.getText())) {
snippets.add(fieldSnippet);
}
}
if (snippets.isEmpty()) {
return null;
}
if (field.fieldOptions().scoreOrdered()) {
// let's sort the snippets by score if needed
CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
}
String[] fragments = new String[snippets.size()];
for (int i = 0; i < fragments.length; i++) {
fragments[i] = snippets.get(i).getText();
}
return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
}
Aggregations