Search in sources :

Example 1 with FragmentsBuilder

use of org.apache.lucene.search.vectorhighlight.FragmentsBuilder in project OpenSearch by opensearch-project.

the class FastVectorHighlighter method highlight.

@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
    SearchHighlightContext.Field field = fieldContext.field;
    FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
    MappedFieldType fieldType = fieldContext.fieldType;
    boolean forceSource = fieldContext.forceSource;
    if (canHighlight(fieldType) == false) {
        throw new IllegalArgumentException("the field [" + fieldContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
    }
    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
    if (!fieldContext.cache.containsKey(CACHE_KEY)) {
        fieldContext.cache.put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry cache = (HighlighterEntry) fieldContext.cache.get(CACHE_KEY);
    FieldHighlightEntry entry = cache.fields.get(fieldType);
    if (entry == null) {
        FragListBuilder fragListBuilder;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragListBuilder = new SingleFragListBuilder();
        } else {
            fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
        }
        Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);
        entry = new FieldHighlightEntry();
        if (field.fieldOptions().requireFieldMatch()) {
            /*
                 * we use top level reader to rewrite the query against all readers,
                 * with use caching it across hits (and across readers...)
                 */
            entry.fieldMatchFieldQuery = new CustomFieldQuery(fieldContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
        } else {
            /*
                 * we use top level reader to rewrite the query against all readers,
                 * with use caching it across hits (and across readers...)
                 */
            entry.noFieldMatchFieldQuery = new CustomFieldQuery(fieldContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
        }
        entry.fragListBuilder = fragListBuilder;
        entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
        if (cache.fvh == null) {
            // parameters to FVH are not requires since:
            // first two booleans are not relevant since they are set on the CustomFieldQuery
            // (phrase and fieldMatch) fragment builders are used explicitly
            cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
        }
        CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
        cache.fields.put(fieldType, entry);
    }
    final FieldQuery fieldQuery;
    if (field.fieldOptions().requireFieldMatch()) {
        fieldQuery = entry.fieldMatchFieldQuery;
    } else {
        fieldQuery = entry.noFieldMatchFieldQuery;
    }
    cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
    String[] fragments;
    FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
    int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
    // Only send matched fields if they were requested to save time.
    if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
        fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
    } else {
        fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
    }
    if (CollectionUtils.isEmpty(fragments) == false) {
        return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
    }
    int noMatchSize = fieldContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0) {
        // Essentially we just request that a fragment is built from 0 to noMatchSize using
        // the normal fragmentsBuilder
        FieldFragList fieldFragList = new SimpleFieldFragList(-1);
        fieldFragList.add(0, noMatchSize, Collections.emptyList());
        fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
        if (CollectionUtils.isEmpty(fragments) == false) {
            return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
        }
    }
    return null;
}
Also used : SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) FragListBuilder(org.apache.lucene.search.vectorhighlight.FragListBuilder) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) Encoder(org.apache.lucene.search.highlight.Encoder) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) FetchSubPhase(org.opensearch.search.fetch.FetchSubPhase) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) FieldQuery(org.apache.lucene.search.vectorhighlight.FieldQuery) SourceLookup(org.opensearch.search.lookup.SourceLookup) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) Field(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.Field) FragmentsBuilder(org.apache.lucene.search.vectorhighlight.FragmentsBuilder) ScoreOrderFragmentsBuilder(org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder) BaseFragmentsBuilder(org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder) FieldFragList(org.apache.lucene.search.vectorhighlight.FieldFragList) SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder)

Example 2 with FragmentsBuilder

use of org.apache.lucene.search.vectorhighlight.FragmentsBuilder in project OpenSearch by opensearch-project.

the class FastVectorHighlighter method fragmentsBuilderSupplier.

private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field, MappedFieldType fieldType, boolean forceSource) {
    BoundaryScanner boundaryScanner = getBoundaryScanner(field);
    FieldOptions options = field.fieldOptions();
    Function<SourceLookup, BaseFragmentsBuilder> supplier;
    if (!forceSource && fieldType.isStored()) {
        if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
            supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
        } else {
            supplier = ignored -> new SimpleFragmentsBuilder(fieldType, options.preTags(), options.postTags(), boundaryScanner);
        }
    } else {
        if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
            supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup, options.preTags(), options.postTags(), boundaryScanner);
        } else {
            supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup, options.preTags(), options.postTags(), boundaryScanner);
        }
    }
    return lookup -> {
        BaseFragmentsBuilder builder = supplier.apply(lookup);
        builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
        return builder;
    };
}
Also used : HashMap(java.util.HashMap) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) Function(java.util.function.Function) FieldFragList(org.apache.lucene.search.vectorhighlight.FieldFragList) SourceLookup(org.opensearch.search.lookup.SourceLookup) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) Locale(java.util.Locale) Map(java.util.Map) BoundaryScanner(org.apache.lucene.search.vectorhighlight.BoundaryScanner) BreakIteratorBoundaryScanner(org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner) SimpleBoundaryScanner(org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner) FragmentsBuilder(org.apache.lucene.search.vectorhighlight.FragmentsBuilder) FetchSubPhase(org.opensearch.search.fetch.FetchSubPhase) Setting(org.opensearch.common.settings.Setting) CollectionUtils(org.opensearch.common.util.CollectionUtils) ScoreOrderFragmentsBuilder(org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) TextSearchInfo(org.opensearch.index.mapper.TextSearchInfo) FieldOptions(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BaseFragmentsBuilder(org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder) FieldQuery(org.apache.lucene.search.vectorhighlight.FieldQuery) Encoder(org.apache.lucene.search.highlight.Encoder) BreakIterator(java.text.BreakIterator) Field(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.Field) FragListBuilder(org.apache.lucene.search.vectorhighlight.FragListBuilder) Collections(java.util.Collections) SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) Text(org.opensearch.common.text.Text) SourceLookup(org.opensearch.search.lookup.SourceLookup) BoundaryScanner(org.apache.lucene.search.vectorhighlight.BoundaryScanner) BreakIteratorBoundaryScanner(org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner) SimpleBoundaryScanner(org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner) FieldOptions(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions) BaseFragmentsBuilder(org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder) ScoreOrderFragmentsBuilder(org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder)

Aggregations

Encoder (org.apache.lucene.search.highlight.Encoder)2 BaseFragmentsBuilder (org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder)2 CustomFieldQuery (org.apache.lucene.search.vectorhighlight.CustomFieldQuery)2 FieldFragList (org.apache.lucene.search.vectorhighlight.FieldFragList)2 FieldQuery (org.apache.lucene.search.vectorhighlight.FieldQuery)2 FragListBuilder (org.apache.lucene.search.vectorhighlight.FragListBuilder)2 FragmentsBuilder (org.apache.lucene.search.vectorhighlight.FragmentsBuilder)2 ScoreOrderFragmentsBuilder (org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder)2 SimpleFieldFragList (org.apache.lucene.search.vectorhighlight.SimpleFieldFragList)2 SimpleFragListBuilder (org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder)2 SingleFragListBuilder (org.apache.lucene.search.vectorhighlight.SingleFragListBuilder)2 MappedFieldType (org.opensearch.index.mapper.MappedFieldType)2 FetchSubPhase (org.opensearch.search.fetch.FetchSubPhase)2 Field (org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.Field)2 SourceLookup (org.opensearch.search.lookup.SourceLookup)2 IOException (java.io.IOException)1 BreakIterator (java.text.BreakIterator)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 Locale (java.util.Locale)1