use of org.apache.lucene.search.vectorhighlight.FragmentsBuilder in project OpenSearch by opensearch-project.
the class FastVectorHighlighter method highlight.
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
SearchHighlightContext.Field field = fieldContext.field;
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
MappedFieldType fieldType = fieldContext.fieldType;
boolean forceSource = fieldContext.forceSource;
if (canHighlight(fieldType) == false) {
throw new IllegalArgumentException("the field [" + fieldContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
}
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
if (!fieldContext.cache.containsKey(CACHE_KEY)) {
fieldContext.cache.put(CACHE_KEY, new HighlighterEntry());
}
HighlighterEntry cache = (HighlighterEntry) fieldContext.cache.get(CACHE_KEY);
FieldHighlightEntry entry = cache.fields.get(fieldType);
if (entry == null) {
FragListBuilder fragListBuilder;
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
} else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
}
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);
entry = new FieldHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
entry.fieldMatchFieldQuery = new CustomFieldQuery(fieldContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
} else {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
entry.noFieldMatchFieldQuery = new CustomFieldQuery(fieldContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
if (cache.fvh == null) {
// parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery
// (phrase and fieldMatch) fragment builders are used explicitly
cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
}
CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
cache.fields.put(fieldType, entry);
}
final FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
fieldQuery = entry.fieldMatchFieldQuery;
} else {
fieldQuery = entry.noFieldMatchFieldQuery;
}
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
String[] fragments;
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
// Only send matched fields if they were requested to save time.
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
} else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
}
if (CollectionUtils.isEmpty(fragments) == false) {
return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
}
int noMatchSize = fieldContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using
// the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1);
fieldFragList.add(0, noMatchSize, Collections.emptyList());
fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
if (CollectionUtils.isEmpty(fragments) == false) {
return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
}
}
return null;
}
use of org.apache.lucene.search.vectorhighlight.FragmentsBuilder in project OpenSearch by opensearch-project.
the class FastVectorHighlighter method fragmentsBuilderSupplier.
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field, MappedFieldType fieldType, boolean forceSource) {
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
FieldOptions options = field.fieldOptions();
Function<SourceLookup, BaseFragmentsBuilder> supplier;
if (!forceSource && fieldType.isStored()) {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = ignored -> new SimpleFragmentsBuilder(fieldType, options.preTags(), options.postTags(), boundaryScanner);
}
} else {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup, options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup, options.preTags(), options.postTags(), boundaryScanner);
}
}
return lookup -> {
BaseFragmentsBuilder builder = supplier.apply(lookup);
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
return builder;
};
}
Aggregations