Search in sources :

Example 1 with AnnotatedText

use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.

the class AnnotatedTextHighlighterTests method assertHighlightOneDoc.

private void assertHighlightOneDoc(String fieldName, String[] markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
    // Annotated fields wrap the usual analyzer with one that injects extra tokens
    Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
    iwc.setMergePolicy(newTieredMergePolicy(random()));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    if (randomBoolean()) {
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    } else {
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    }
    ft.freeze();
    Document doc = new Document();
    for (String input : markedUpInputs) {
        Field field = new Field(fieldName, "", ft);
        field.setStringValue(input);
        doc.add(field);
    }
    iw.addDocument(doc);
    DirectoryReader reader = iw.getReader();
    IndexSearcher searcher = newSearcher(reader);
    iw.close();
    AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
    for (int i = 0; i < markedUpInputs.length; i++) {
        annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
    }
    AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
    hiliteAnalyzer.setAnnotations(annotations);
    AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
    passageFormatter.setAnnotations(annotations);
    ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
    for (int i = 0; i < annotations.length; i++) {
        plainTextForHighlighter.add(annotations[i].textMinusMarkup);
    }
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
    assertThat(topDocs.totalHits.value, equalTo(1L));
    String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, name -> "text".equals(name), Integer.MAX_VALUE, Integer.MAX_VALUE);
    highlighter.setFieldMatcher((name) -> "text".equals(name));
    final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
    assertEquals(expectedPassages.length, snippets.length);
    for (int i = 0; i < snippets.length; i++) {
        assertEquals(expectedPassages[i], snippets[i].getText());
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) AnnotatedHighlighterAnalyzer(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer) ArrayList(java.util.ArrayList) AnnotatedHighlighterAnalyzer(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Directory(org.apache.lucene.store.Directory) DirectoryReader(org.apache.lucene.index.DirectoryReader) AnnotatedText(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText) CustomUnifiedHighlighter(org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter) Snippet(org.apache.lucene.search.uhighlight.Snippet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldType(org.apache.lucene.document.FieldType) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) AnnotationAnalyzerWrapper(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with AnnotatedText

use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.

the class AnnotatedPassageFormatter method getIntersectingAnnotations.

public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
    List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
    int fieldValueOffset = 0;
    for (AnnotatedText fieldValueAnnotations : this.annotations) {
        // the previous values AND the MULTIVAL delimiter
        for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
            AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
            if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
                intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + fieldValueOffset, token.value));
            }
        }
        // add 1 for the fieldvalue separator character
        fieldValueOffset += fieldValueAnnotations.textMinusMarkup.length() + 1;
    }
    return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}
Also used : AnnotatedText(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText) ArrayList(java.util.ArrayList) AnnotationToken(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken)

Example 3 with AnnotatedText

use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.

the class AnnotatedTextParsingTests method checkParsing.

private void checkParsing(String markup, String expectedPlainText, AnnotationToken... expectedTokens) {
    AnnotatedText at = AnnotatedText.parse(markup);
    assertEquals(expectedPlainText, at.textMinusMarkup);
    List<AnnotationToken> actualAnnotations = at.annotations;
    assertEquals(expectedTokens.length, actualAnnotations.size());
    for (int i = 0; i < expectedTokens.length; i++) {
        assertEquals(expectedTokens[i], actualAnnotations.get(i));
    }
}
Also used : AnnotatedText(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText) AnnotationToken(org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken)

Aggregations

AnnotatedText (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText)3 ArrayList (java.util.ArrayList)2 AnnotationToken (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken)2 Analyzer (org.apache.lucene.analysis.Analyzer)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 TextField (org.apache.lucene.document.TextField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 TopDocs (org.apache.lucene.search.TopDocs)1 DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)1 CustomUnifiedHighlighter (org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter)1 Snippet (org.apache.lucene.search.uhighlight.Snippet)1 Directory (org.apache.lucene.store.Directory)1 AnnotatedHighlighterAnalyzer (org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer)1