Search in sources :

Example 1 with CustomSeparatorBreakIterator

use of org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator in project elasticsearch by elastic.

the class PostingsHighlighter method highlight.

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
    FieldMapper fieldMapper = highlighterContext.mapper;
    SearchContextHighlight.Field field = highlighterContext.field;
    if (canHighlight(fieldMapper) == false) {
        throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
    }
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
    MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);
    if (mapperHighlighterEntry == null) {
        Encoder encoder = field.fieldOptions().encoder().equals("html") ? Encoders.HTML : Encoders.DEFAULT;
        CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder);
        mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
    }
    List<Snippet> snippets = new ArrayList<>();
    int numberOfFragments;
    try {
        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
        List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
        CustomPostingsHighlighter highlighter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            //we use a control char to separate values, which is the only char that the custom break iterator breaks the text on,
            //so we don't lose the distinction between the different values of a field and we get back a snippet per value
            String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR);
            CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR);
            highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, breakIterator, fieldValue, field.fieldOptions().noMatchSize() > 0);
            //we are highlighting the whole content, one snippet per value
            numberOfFragments = fieldValues.size();
        } else {
            //using paragraph separator we make sure that each field value holds a discrete passage for highlighting
            String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR);
            highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, fieldValue, field.fieldOptions().noMatchSize() > 0);
            numberOfFragments = field.fieldOptions().numberOfFragments();
        }
        IndexSearcher searcher = new IndexSearcher(hitContext.reader());
        Snippet[] fieldSnippets = highlighter.highlightField(fieldMapper.fieldType().name(), highlighterContext.query, searcher, hitContext.docId(), numberOfFragments);
        for (Snippet fieldSnippet : fieldSnippets) {
            if (Strings.hasText(fieldSnippet.getText())) {
                snippets.add(fieldSnippet);
            }
        }
    } catch (IOException e) {
        throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
    snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
    if (field.fieldOptions().scoreOrdered()) {
        //let's sort the snippets by score if needed
        CollectionUtil.introSort(snippets, new Comparator<Snippet>() {

            @Override
            public int compare(Snippet o1, Snippet o2) {
                return (int) Math.signum(o2.getScore() - o1.getScore());
            }
        });
    }
    String[] fragments = new String[snippets.size()];
    for (int i = 0; i < fragments.length; i++) {
        fragments[i] = snippets.get(i).getText();
    }
    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
    }
    return null;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) Analyzer(org.apache.lucene.analysis.Analyzer) CustomSeparatorBreakIterator(org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator) CustomPostingsHighlighter(org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter) Encoder(org.apache.lucene.search.highlight.Encoder) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) CustomPassageFormatter(org.apache.lucene.search.postingshighlight.CustomPassageFormatter) Snippet(org.apache.lucene.search.highlight.Snippet) IOException(java.io.IOException) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) FieldMapper(org.elasticsearch.index.mapper.FieldMapper)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Encoder (org.apache.lucene.search.highlight.Encoder)1 Snippet (org.apache.lucene.search.highlight.Snippet)1 CustomPassageFormatter (org.apache.lucene.search.postingshighlight.CustomPassageFormatter)1 CustomPostingsHighlighter (org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter)1 CustomSeparatorBreakIterator (org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator)1 FieldMapper (org.elasticsearch.index.mapper.FieldMapper)1 FetchPhaseExecutionException (org.elasticsearch.search.fetch.FetchPhaseExecutionException)1 FetchSubPhase (org.elasticsearch.search.fetch.FetchSubPhase)1 SearchContext (org.elasticsearch.search.internal.SearchContext)1