Search in sources :

Example 1 with AnnotatedHighlighterAnalyzer

use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer in project OpenSearch by opensearch-project.

the class AnnotatedTextHighlighterTests method assertHighlightOneDoc.

private void assertHighlightOneDoc(String fieldName, String[] markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
    // Annotated fields wrap the usual analyzer with one that injects extra tokens
    Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
    iwc.setMergePolicy(newTieredMergePolicy(random()));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    if (randomBoolean()) {
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    } else {
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    }
    ft.freeze();
    Document doc = new Document();
    for (String input : markedUpInputs) {
        Field field = new Field(fieldName, "", ft);
        field.setStringValue(input);
        doc.add(field);
    }
    iw.addDocument(doc);
    DirectoryReader reader = iw.getReader();
    IndexSearcher searcher = newSearcher(reader);
    iw.close();
    AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
    for (int i = 0; i < markedUpInputs.length; i++) {
        annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
    }
    AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
    hiliteAnalyzer.setAnnotations(annotations);
    AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
    passageFormatter.setAnnotations(annotations);
    ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
    for (int i = 0; i < annotations.length; i++) {
        plainTextForHighlighter.add(annotations[i].textMinusMarkup);
    }
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
    assertThat(topDocs.totalHits.value, equalTo(1L));
    String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, name -> "text".equals(name), Integer.MAX_VALUE, Integer.MAX_VALUE);
    highlighter.setFieldMatcher((name) -> "text".equals(name));
    final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
    assertEquals(expectedPassages.length, snippets.length);
    for (int i = 0; i < snippets.length; i++) {
        assertEquals(expectedPassages[i], snippets[i].getText());
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) AnnotatedHighlighterAnalyzer(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer) ArrayList(java.util.ArrayList) AnnotatedHighlighterAnalyzer(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Directory(org.apache.lucene.store.Directory) DirectoryReader(org.apache.lucene.index.DirectoryReader) AnnotatedText(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText) CustomUnifiedHighlighter(org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter) Snippet(org.apache.lucene.search.uhighlight.Snippet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldType(org.apache.lucene.document.FieldType) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) AnnotationAnalyzerWrapper(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

ArrayList (java.util.ArrayList)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 TextField (org.apache.lucene.document.TextField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 TopDocs (org.apache.lucene.search.TopDocs)1 DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)1 CustomUnifiedHighlighter (org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter)1 Snippet (org.apache.lucene.search.uhighlight.Snippet)1 Directory (org.apache.lucene.store.Directory)1 AnnotatedHighlighterAnalyzer (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer)1 AnnotatedText (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText)1 AnnotationAnalyzerWrapper (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper)1