Search in sources :

Example 1 with CustomPassageFormatter

use of org.apache.lucene.search.uhighlight.CustomPassageFormatter in project elasticsearch by elastic.

the class UnifiedHighlighter method highlight.

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
    FieldMapper fieldMapper = highlighterContext.mapper;
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
    MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);
    if (mapperHighlighterEntry == null) {
        Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
        CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder);
        mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
    }
    List<Snippet> snippets = new ArrayList<>();
    int numberOfFragments;
    try {
        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
        List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
        fieldValues = fieldValues.stream().map(obj -> {
            if (obj instanceof BytesRef) {
                return fieldMapper.fieldType().valueForDisplay(obj).toString();
            } else {
                return obj;
            }
        }).collect(Collectors.toList());
        IndexSearcher searcher = new IndexSearcher(hitContext.reader());
        CustomUnifiedHighlighter highlighter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            // we use a control char to separate values, which is the only char that the custom break iterator
            // breaks the text on, so we don't lose the distinction between the different values of a field and we
            // get back a snippet per value
            String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
            org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator breakIterator = new org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
            highlighter = new CustomUnifiedHighlighter(searcher, analyzer, mapperHighlighterEntry.passageFormatter, field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize());
            // we are highlighting the whole content, one snippet per value
            numberOfFragments = fieldValues.size();
        } else {
            //using paragraph separator we make sure that each field value holds a discrete passage for highlighting
            String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
            BreakIterator bi = getBreakIterator(field);
            highlighter = new CustomUnifiedHighlighter(searcher, analyzer, mapperHighlighterEntry.passageFormatter, field.fieldOptions().boundaryScannerLocale(), bi, fieldValue, field.fieldOptions().noMatchSize());
            numberOfFragments = field.fieldOptions().numberOfFragments();
        }
        if (field.fieldOptions().requireFieldMatch()) {
            final String fieldName = highlighterContext.fieldName;
            highlighter.setFieldMatcher((name) -> fieldName.equals(name));
        } else {
            highlighter.setFieldMatcher((name) -> true);
        }
        Snippet[] fieldSnippets = highlighter.highlightField(highlighterContext.fieldName, highlighterContext.query, hitContext.docId(), numberOfFragments);
        for (Snippet fieldSnippet : fieldSnippets) {
            if (Strings.hasText(fieldSnippet.getText())) {
                snippets.add(fieldSnippet);
            }
        }
    } catch (IOException e) {
        throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
    snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
    if (field.fieldOptions().scoreOrdered()) {
        //let's sort the snippets by score if needed
        CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
    }
    String[] fragments = new String[snippets.size()];
    for (int i = 0; i < fragments.length; i++) {
        fragments[i] = snippets.get(i).getText();
    }
    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
    }
    return null;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) Analyzer(org.apache.lucene.analysis.Analyzer) BreakIterator(java.text.BreakIterator) Encoder(org.apache.lucene.search.highlight.Encoder) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) CustomPassageFormatter(org.apache.lucene.search.uhighlight.CustomPassageFormatter) BytesRef(org.apache.lucene.util.BytesRef) CustomUnifiedHighlighter(org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter) Snippet(org.apache.lucene.search.highlight.Snippet) IOException(java.io.IOException) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) FieldMapper(org.elasticsearch.index.mapper.FieldMapper)

Aggregations

IOException (java.io.IOException)1 BreakIterator (java.text.BreakIterator)1 ArrayList (java.util.ArrayList)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Encoder (org.apache.lucene.search.highlight.Encoder)1 Snippet (org.apache.lucene.search.highlight.Snippet)1 CustomPassageFormatter (org.apache.lucene.search.uhighlight.CustomPassageFormatter)1 CustomUnifiedHighlighter (org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter)1 BytesRef (org.apache.lucene.util.BytesRef)1 FieldMapper (org.elasticsearch.index.mapper.FieldMapper)1 FetchPhaseExecutionException (org.elasticsearch.search.fetch.FetchPhaseExecutionException)1 FetchSubPhase (org.elasticsearch.search.fetch.FetchSubPhase)1 SearchContext (org.elasticsearch.search.internal.SearchContext)1