use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.
the class PercolatorHighlightSubFetchPhaseTests method testHitsExecutionNeeded.
public void testHitsExecutionNeeded() {
PercolateQuery percolateQuery = new PercolateQuery("_name", ctx -> null, Collections.singletonList(new BytesArray("{}")), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), null, new MatchAllDocsQuery());
PercolatorHighlightSubFetchPhase subFetchPhase = new PercolatorHighlightSubFetchPhase(emptyMap());
FetchContext fetchContext = mock(FetchContext.class);
Mockito.when(fetchContext.highlight()).thenReturn(new SearchHighlightContext(Collections.emptyList()));
Mockito.when(fetchContext.query()).thenReturn(new MatchAllDocsQuery());
assertNull(subFetchPhase.getProcessor(fetchContext));
Mockito.when(fetchContext.query()).thenReturn(percolateQuery);
assertNotNull(subFetchPhase.getProcessor(fetchContext));
}
use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.
the class PlainHighlighter method highlight.
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
SearchHighlightContext.Field field = fieldContext.field;
FetchContext context = fieldContext.context;
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
MappedFieldType fieldType = fieldContext.fieldType;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
if (!fieldContext.cache.containsKey(CACHE_KEY)) {
fieldContext.cache.put(CACHE_KEY, new HashMap<>());
}
@SuppressWarnings("unchecked") Map<MappedFieldType, org.apache.lucene.search.highlight.Highlighter> cache = (Map<MappedFieldType, org.apache.lucene.search.highlight.Highlighter>) fieldContext.cache.get(CACHE_KEY);
org.apache.lucene.search.highlight.Highlighter entry = cache.get(fieldType);
if (entry == null) {
QueryScorer queryScorer = new CustomQueryScorer(fieldContext.query, field.fieldOptions().requireFieldMatch() ? fieldType.name() : null);
queryScorer.setExpandMultiTermQuery(true);
Fragmenter fragmenter;
if (field.fieldOptions().numberOfFragments() == 0) {
fragmenter = new NullFragmenter();
} else if (field.fieldOptions().fragmenter() == null) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
} else if ("simple".equals(field.fieldOptions().fragmenter())) {
fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
} else if ("span".equals(field.fieldOptions().fragmenter())) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
} else {
throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + fieldContext.fieldName + "]");
}
Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
entry.setTextFragmenter(fragmenter);
// always highlight across all data
entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
cache.put(fieldType, entry);
}
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.mapperService().documentMapper().mappers().indexAnalyzer();
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
textsToHighlight = HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);
for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException("The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() + "] doc of [" + context.getIndexName() + "] index " + "has exceeded [" + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " + "This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + "] index level setting. " + "For large texts, indexing with offsets or term vectors, and highlighting " + "with unified or fvh highlighter is recommended!");
}
try (TokenStream tokenStream = analyzer.tokenStream(fieldType.name(), text)) {
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
// can't perform highlighting if the stream has no terms (binary token stream) or no offsets
continue;
}
TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
for (TextFragment bestTextFragment : bestTextFragments) {
if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
fragsList.add(bestTextFragment);
}
}
} catch (BytesRefHash.MaxBytesLengthExceededException e) {
// this can happen if for example a field is not_analyzed and ignore_above option is set.
// the field will be ignored when indexing but the huge term is still in the source and
// the plain highlighter will parse the source and try to analyze it.
// ignore and continue to the next value
} catch (InvalidTokenOffsetsException e) {
throw new IllegalArgumentException(e);
}
}
if (field.fieldOptions().scoreOrdered()) {
CollectionUtil.introSort(fragsList, (o1, o2) -> Math.round(o2.getScore() - o1.getScore()));
}
String[] fragments;
// number_of_fragments is set to 0 but we have a multivalued field
if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
fragments = new String[fragsList.size()];
for (int i = 0; i < fragsList.size(); i++) {
fragments[i] = fragsList.get(i).toString();
}
} else {
// refine numberOfFragments if needed
numberOfFragments = Math.min(fragsList.size(), numberOfFragments);
fragments = new String[numberOfFragments];
for (int i = 0; i < fragments.length; i++) {
fragments[i] = fragsList.get(i).toString();
}
}
if (fragments.length > 0) {
return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
}
int noMatchSize = fieldContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0 && textsToHighlight.size() > 0) {
// Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
String fieldContents = textsToHighlight.get(0).toString();
int end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, fieldType.name(), fieldContents);
if (end > 0) {
return new HighlightField(fieldContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
}
}
return null;
}
use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.
the class PercolatorMatchedSlotSubFetchPhaseTests method testHitsExecute.
public void testHitsExecute() throws Exception {
try (Directory directory = newDirectory()) {
// Need a one doc index:
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
indexWriter.addDocument(document);
}
PercolatorMatchedSlotSubFetchPhase phase = new PercolatorMatchedSlotSubFetchPhase();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
LeafReaderContext context = reader.leaves().get(0);
// A match:
{
HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
FetchContext sc = mock(FetchContext.class);
when(sc.query()).thenReturn(percolateQuery);
FetchSubPhaseProcessor processor = phase.getProcessor(sc);
assertNotNull(processor);
processor.process(hit);
assertNotNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
assertEquals(0, (int) hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX).getValue());
}
// No match:
{
HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
FetchContext sc = mock(FetchContext.class);
when(sc.query()).thenReturn(percolateQuery);
FetchSubPhaseProcessor processor = phase.getProcessor(sc);
assertNotNull(processor);
processor.process(hit);
assertNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
}
// No query:
{
HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
PercolateQuery.QueryStore queryStore = ctx -> docId -> null;
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
FetchContext sc = mock(FetchContext.class);
when(sc.query()).thenReturn(percolateQuery);
FetchSubPhaseProcessor processor = phase.getProcessor(sc);
assertNotNull(processor);
processor.process(hit);
assertNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
}
}
}
}
use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.
the class FetchDocValuesPhase method getProcessor.
@Override
public FetchSubPhaseProcessor getProcessor(FetchContext context) {
FetchDocValuesContext dvContext = context.docValuesContext();
if (dvContext == null) {
return null;
}
if (context.docValuesContext().fields().stream().map(f -> f.format).anyMatch(USE_DEFAULT_FORMAT::equals)) {
DEPRECATION_LOGGER.deprecate("explicit_default_format", "[" + USE_DEFAULT_FORMAT + "] is a special format that was only used to " + "ease the transition to 7.x. It has become the default and shouldn't be set explicitly anymore.");
}
/*
* Its tempting to swap this to a `Map` but that'd break backwards
* compatibility because we support fetching the same field multiple
* times with different configuration. That isn't possible with a `Map`.
*/
List<DocValueField> fields = new ArrayList<>();
for (FieldAndFormat fieldAndFormat : context.docValuesContext().fields()) {
MappedFieldType ft = context.mapperService().fieldType(fieldAndFormat.field);
if (ft == null) {
continue;
}
String format = USE_DEFAULT_FORMAT.equals(fieldAndFormat.format) ? null : fieldAndFormat.format;
ValueFetcher fetcher = new DocValueFetcher(ft.docValueFormat(format, null), context.searchLookup().doc().getForField(ft));
fields.add(new DocValueField(fieldAndFormat.field, fetcher));
}
return new FetchSubPhaseProcessor() {
@Override
public void setNextReader(LeafReaderContext readerContext) {
for (DocValueField f : fields) {
f.fetcher.setNextReader(readerContext);
}
}
@Override
public void process(HitContext hit) throws IOException {
for (DocValueField f : fields) {
DocumentField hitField = hit.hit().field(f.field);
if (hitField == null) {
hitField = new DocumentField(f.field, new ArrayList<>(2));
// even if we request a doc values of a meta-field (e.g. _routing),
// docValues fields will still be document fields, and put under "fields" section of a hit.
hit.hit().setDocumentField(f.field, hitField);
}
hitField.getValues().addAll(f.fetcher.fetchValues(hit.sourceLookup()));
}
}
};
}
use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.
the class FetchSourcePhaseTests method hitExecuteMultiple.
private HitContext hitExecuteMultiple(XContentBuilder source, boolean fetchSource, String[] includes, String[] excludes, SearchHit.NestedIdentity nestedIdentity) throws IOException {
FetchSourceContext fetchSourceContext = new FetchSourceContext(fetchSource, includes, excludes);
FetchContext fetchContext = mock(FetchContext.class);
when(fetchContext.fetchSourceContext()).thenReturn(fetchSourceContext);
when(fetchContext.getIndexName()).thenReturn("index");
final SearchHit searchHit = new SearchHit(1, null, nestedIdentity, null, null);
// We don't need a real index, just a LeafReaderContext which cannot be mocked.
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
HitContext hitContext = new HitContext(searchHit, leafReaderContext, 1, new SourceLookup());
hitContext.sourceLookup().setSource(source == null ? null : BytesReference.bytes(source));
FetchSourcePhase phase = new FetchSourcePhase();
FetchSubPhaseProcessor processor = phase.getProcessor(fetchContext);
if (fetchSource == false) {
assertNull(processor);
} else {
assertNotNull(processor);
processor.process(hitContext);
}
return hitContext;
}
Aggregations