Search in sources :

Example 1 with FetchContext

use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.

the class PercolatorHighlightSubFetchPhaseTests method testHitsExecutionNeeded.

public void testHitsExecutionNeeded() {
    PercolateQuery percolateQuery = new PercolateQuery("_name", ctx -> null, Collections.singletonList(new BytesArray("{}")), new MatchAllDocsQuery(), Mockito.mock(IndexSearcher.class), null, new MatchAllDocsQuery());
    PercolatorHighlightSubFetchPhase subFetchPhase = new PercolatorHighlightSubFetchPhase(emptyMap());
    FetchContext fetchContext = mock(FetchContext.class);
    Mockito.when(fetchContext.highlight()).thenReturn(new SearchHighlightContext(Collections.emptyList()));
    Mockito.when(fetchContext.query()).thenReturn(new MatchAllDocsQuery());
    assertNull(subFetchPhase.getProcessor(fetchContext));
    Mockito.when(fetchContext.query()).thenReturn(percolateQuery);
    assertNotNull(subFetchPhase.getProcessor(fetchContext));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FetchContext(org.opensearch.search.fetch.FetchContext) BytesArray(org.opensearch.common.bytes.BytesArray) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SearchHighlightContext(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext)

Example 2 with FetchContext

use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.

the class PlainHighlighter method highlight.

@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
    SearchHighlightContext.Field field = fieldContext.field;
    FetchContext context = fieldContext.context;
    FetchSubPhase.HitContext hitContext = fieldContext.hitContext;
    MappedFieldType fieldType = fieldContext.fieldType;
    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
    if (!fieldContext.cache.containsKey(CACHE_KEY)) {
        fieldContext.cache.put(CACHE_KEY, new HashMap<>());
    }
    @SuppressWarnings("unchecked") Map<MappedFieldType, org.apache.lucene.search.highlight.Highlighter> cache = (Map<MappedFieldType, org.apache.lucene.search.highlight.Highlighter>) fieldContext.cache.get(CACHE_KEY);
    org.apache.lucene.search.highlight.Highlighter entry = cache.get(fieldType);
    if (entry == null) {
        QueryScorer queryScorer = new CustomQueryScorer(fieldContext.query, field.fieldOptions().requireFieldMatch() ? fieldType.name() : null);
        queryScorer.setExpandMultiTermQuery(true);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + fieldContext.fieldName + "]");
        }
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        entry.setTextFragmenter(fragmenter);
        // always highlight across all data
        entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
        cache.put(fieldType, entry);
    }
    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<>();
    List<Object> textsToHighlight;
    Analyzer analyzer = context.mapperService().documentMapper().mappers().indexAnalyzer();
    final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
    textsToHighlight = HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);
    for (Object textToHighlight : textsToHighlight) {
        String text = convertFieldValue(fieldType, textToHighlight);
        int textLength = text.length();
        if (textLength > maxAnalyzedOffset) {
            throw new IllegalArgumentException("The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() + "] doc of [" + context.getIndexName() + "] index " + "has exceeded [" + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " + "This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + "] index level setting. " + "For large texts, indexing with offsets or term vectors, and highlighting " + "with unified or fvh highlighter is recommended!");
        }
        try (TokenStream tokenStream = analyzer.tokenStream(fieldType.name(), text)) {
            if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
                continue;
            }
            TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
            for (TextFragment bestTextFragment : bestTextFragments) {
                if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
                    fragsList.add(bestTextFragment);
                }
            }
        } catch (BytesRefHash.MaxBytesLengthExceededException e) {
        // this can happen if for example a field is not_analyzed and ignore_above option is set.
        // the field will be ignored when indexing but the huge term is still in the source and
        // the plain highlighter will parse the source and try to analyze it.
        // ignore and continue to the next value
        } catch (InvalidTokenOffsetsException e) {
            throw new IllegalArgumentException(e);
        }
    }
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, (o1, o2) -> Math.round(o2.getScore() - o1.getScore()));
    }
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = Math.min(fragsList.size(), numberOfFragments);
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    }
    if (fragments.length > 0) {
        return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments));
    }
    int noMatchSize = fieldContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        int end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, fieldType.name(), fieldContents);
        if (end > 0) {
            return new HighlightField(fieldContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
        }
    }
    return null;
}
Also used : FetchContext(org.opensearch.search.fetch.FetchContext) TokenStream(org.apache.lucene.analysis.TokenStream) Formatter(org.apache.lucene.search.highlight.Formatter) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) ArrayList(java.util.ArrayList) TextFragment(org.apache.lucene.search.highlight.TextFragment) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleFragmenter(org.apache.lucene.search.highlight.SimpleFragmenter) Encoder(org.apache.lucene.search.highlight.Encoder) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) SimpleFragmenter(org.apache.lucene.search.highlight.SimpleFragmenter) Fragmenter(org.apache.lucene.search.highlight.Fragmenter) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) NullFragmenter(org.apache.lucene.search.highlight.NullFragmenter) FetchSubPhase(org.opensearch.search.fetch.FetchSubPhase) BytesRefHash(org.apache.lucene.util.BytesRefHash) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) Text(org.opensearch.common.text.Text) NullFragmenter(org.apache.lucene.search.highlight.NullFragmenter) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with FetchContext

use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.

the class PercolatorMatchedSlotSubFetchPhaseTests method testHitsExecute.

public void testHitsExecute() throws Exception {
    try (Directory directory = newDirectory()) {
        // Need a one doc index:
        try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
            Document document = new Document();
            indexWriter.addDocument(document);
        }
        PercolatorMatchedSlotSubFetchPhase phase = new PercolatorMatchedSlotSubFetchPhase();
        try (DirectoryReader reader = DirectoryReader.open(directory)) {
            LeafReaderContext context = reader.leaves().get(0);
            // A match:
            {
                HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
                PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
                MemoryIndex memoryIndex = new MemoryIndex();
                memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
                memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
                PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
                FetchContext sc = mock(FetchContext.class);
                when(sc.query()).thenReturn(percolateQuery);
                FetchSubPhaseProcessor processor = phase.getProcessor(sc);
                assertNotNull(processor);
                processor.process(hit);
                assertNotNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
                assertEquals(0, (int) hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX).getValue());
            }
            // No match:
            {
                HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
                PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
                MemoryIndex memoryIndex = new MemoryIndex();
                memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
                memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
                PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
                FetchContext sc = mock(FetchContext.class);
                when(sc.query()).thenReturn(percolateQuery);
                FetchSubPhaseProcessor processor = phase.getProcessor(sc);
                assertNotNull(processor);
                processor.process(hit);
                assertNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
            }
            // No query:
            {
                HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup());
                PercolateQuery.QueryStore queryStore = ctx -> docId -> null;
                MemoryIndex memoryIndex = new MemoryIndex();
                memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
                memoryIndex.addField(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 0), null);
                PercolateQuery percolateQuery = new PercolateQuery("_name", queryStore, Collections.emptyList(), new MatchAllDocsQuery(), memoryIndex.createSearcher(), null, new MatchNoDocsQuery());
                FetchContext sc = mock(FetchContext.class);
                when(sc.query()).thenReturn(percolateQuery);
                FetchSubPhaseProcessor processor = phase.getProcessor(sc);
                assertNotNull(processor);
                processor.process(hit);
                assertNull(hit.hit().field(PercolatorMatchedSlotSubFetchPhase.FIELD_NAME_PREFIX));
            }
        }
    }
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) FetchContext(org.opensearch.search.fetch.FetchContext) TermQuery(org.apache.lucene.search.TermQuery) SourceLookup(org.opensearch.search.lookup.SourceLookup) SearchHit(org.opensearch.search.SearchHit) DirectoryReader(org.apache.lucene.index.DirectoryReader) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) HitContext(org.opensearch.search.fetch.FetchSubPhase.HitContext) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) FetchSubPhaseProcessor(org.opensearch.search.fetch.FetchSubPhaseProcessor) Directory(org.apache.lucene.store.Directory)

Example 4 with FetchContext

use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.

the class FetchDocValuesPhase method getProcessor.

@Override
public FetchSubPhaseProcessor getProcessor(FetchContext context) {
    FetchDocValuesContext dvContext = context.docValuesContext();
    if (dvContext == null) {
        return null;
    }
    if (context.docValuesContext().fields().stream().map(f -> f.format).anyMatch(USE_DEFAULT_FORMAT::equals)) {
        DEPRECATION_LOGGER.deprecate("explicit_default_format", "[" + USE_DEFAULT_FORMAT + "] is a special format that was only used to " + "ease the transition to 7.x. It has become the default and shouldn't be set explicitly anymore.");
    }
    /*
         * Its tempting to swap this to a `Map` but that'd break backwards
         * compatibility because we support fetching the same field multiple
         * times with different configuration. That isn't possible with a `Map`.
         */
    List<DocValueField> fields = new ArrayList<>();
    for (FieldAndFormat fieldAndFormat : context.docValuesContext().fields()) {
        MappedFieldType ft = context.mapperService().fieldType(fieldAndFormat.field);
        if (ft == null) {
            continue;
        }
        String format = USE_DEFAULT_FORMAT.equals(fieldAndFormat.format) ? null : fieldAndFormat.format;
        ValueFetcher fetcher = new DocValueFetcher(ft.docValueFormat(format, null), context.searchLookup().doc().getForField(ft));
        fields.add(new DocValueField(fieldAndFormat.field, fetcher));
    }
    return new FetchSubPhaseProcessor() {

        @Override
        public void setNextReader(LeafReaderContext readerContext) {
            for (DocValueField f : fields) {
                f.fetcher.setNextReader(readerContext);
            }
        }

        @Override
        public void process(HitContext hit) throws IOException {
            for (DocValueField f : fields) {
                DocumentField hitField = hit.hit().field(f.field);
                if (hitField == null) {
                    hitField = new DocumentField(f.field, new ArrayList<>(2));
                    // even if we request a doc values of a meta-field (e.g. _routing),
                    // docValues fields will still be document fields, and put under "fields" section of a hit.
                    hit.hit().setDocumentField(f.field, hitField);
                }
                hitField.getValues().addAll(f.fetcher.fetchValues(hit.sourceLookup()));
            }
        }
    };
}
Also used : DeprecationLogger(org.opensearch.common.logging.DeprecationLogger) FetchSubPhase(org.opensearch.search.fetch.FetchSubPhase) List(java.util.List) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) ValueFetcher(org.opensearch.index.mapper.ValueFetcher) FetchContext(org.opensearch.search.fetch.FetchContext) DocValueFetcher(org.opensearch.index.mapper.DocValueFetcher) IOException(java.io.IOException) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocumentField(org.opensearch.common.document.DocumentField) FetchSubPhaseProcessor(org.opensearch.search.fetch.FetchSubPhaseProcessor) ArrayList(java.util.ArrayList) DocumentField(org.opensearch.common.document.DocumentField) DocValueFetcher(org.opensearch.index.mapper.DocValueFetcher) ArrayList(java.util.ArrayList) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ValueFetcher(org.opensearch.index.mapper.ValueFetcher) DocValueFetcher(org.opensearch.index.mapper.DocValueFetcher) FetchSubPhaseProcessor(org.opensearch.search.fetch.FetchSubPhaseProcessor)

Example 5 with FetchContext

use of org.opensearch.search.fetch.FetchContext in project OpenSearch by opensearch-project.

the class FetchSourcePhaseTests method hitExecuteMultiple.

private HitContext hitExecuteMultiple(XContentBuilder source, boolean fetchSource, String[] includes, String[] excludes, SearchHit.NestedIdentity nestedIdentity) throws IOException {
    FetchSourceContext fetchSourceContext = new FetchSourceContext(fetchSource, includes, excludes);
    FetchContext fetchContext = mock(FetchContext.class);
    when(fetchContext.fetchSourceContext()).thenReturn(fetchSourceContext);
    when(fetchContext.getIndexName()).thenReturn("index");
    final SearchHit searchHit = new SearchHit(1, null, nestedIdentity, null, null);
    // We don't need a real index, just a LeafReaderContext which cannot be mocked.
    MemoryIndex index = new MemoryIndex();
    LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
    HitContext hitContext = new HitContext(searchHit, leafReaderContext, 1, new SourceLookup());
    hitContext.sourceLookup().setSource(source == null ? null : BytesReference.bytes(source));
    FetchSourcePhase phase = new FetchSourcePhase();
    FetchSubPhaseProcessor processor = phase.getProcessor(fetchContext);
    if (fetchSource == false) {
        assertNull(processor);
    } else {
        assertNotNull(processor);
        processor.process(hitContext);
    }
    return hitContext;
}
Also used : FetchContext(org.opensearch.search.fetch.FetchContext) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) SourceLookup(org.opensearch.search.lookup.SourceLookup) SearchHit(org.opensearch.search.SearchHit) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) HitContext(org.opensearch.search.fetch.FetchSubPhase.HitContext) FetchSubPhaseProcessor(org.opensearch.search.fetch.FetchSubPhaseProcessor)

Aggregations

FetchContext (org.opensearch.search.fetch.FetchContext)5 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 FetchSubPhaseProcessor (org.opensearch.search.fetch.FetchSubPhaseProcessor)3 ArrayList (java.util.ArrayList)2 MemoryIndex (org.apache.lucene.index.memory.MemoryIndex)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2 MappedFieldType (org.opensearch.index.mapper.MappedFieldType)2 SearchHit (org.opensearch.search.SearchHit)2 FetchSubPhase (org.opensearch.search.fetch.FetchSubPhase)2 HitContext (org.opensearch.search.fetch.FetchSubPhase.HitContext)2 SourceLookup (org.opensearch.search.lookup.SourceLookup)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)1 Document (org.apache.lucene.document.Document)1 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)1