use of org.apache.lucene.search.uhighlight.UnifiedHighlighter.OffsetSource in project OpenSearch by opensearch-project.
the class UnifiedHighlighter method buildHighlighter.
CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) throws IOException {
Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getIndexSettings().getHighlightMaxAnalyzedOffset();
int keywordIgnoreAbove = Integer.MAX_VALUE;
if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) fieldContext.context.mapperService().documentMapper().mappers().getMapper(fieldContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = getAnalyzer(fieldContext.context.mapperService().documentMapper());
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
IndexSearcher searcher = fieldContext.context.searcher();
OffsetSource offsetSource = getOffsetSource(fieldContext.fieldType);
BreakIterator breakIterator;
int higlighterNumberOfFragments;
if (numberOfFragments == 0 || // non-tokenized fields should not use any break iterator (ignore boundaryScannerType)
fieldContext.fieldType.getTextSearchInfo().isTokenized() == false) {
/*
* We use a control char to separate values, which is the
* only char that the custom break iterator breaks the text
* on, so we don't lose the distinction between the different
* values of a field and we get back a snippet per value
*/
breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
higlighterNumberOfFragments = numberOfFragments == 0 ? Integer.MAX_VALUE - 1 : numberOfFragments;
} else {
// using paragraph separator we make sure that each field value holds a discrete passage for highlighting
breakIterator = getBreakIterator(fieldContext.field);
higlighterNumberOfFragments = numberOfFragments;
}
return new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter, fieldContext.field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldContext.context.getIndexName(), fieldContext.fieldName, fieldContext.query, fieldContext.field.fieldOptions().noMatchSize(), higlighterNumberOfFragments, fieldMatcher(fieldContext), keywordIgnoreAbove, maxAnalyzedOffset);
}
Aggregations