use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.
the class AnnotatedTextHighlighterTests method assertHighlightOneDoc.
private void assertHighlightOneDoc(String fieldName, String[] markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
// Annotated fields wrap the usual analyzer with one that injects extra tokens
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
iwc.setMergePolicy(newTieredMergePolicy(random()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_STORED);
if (randomBoolean()) {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
} else {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
ft.freeze();
Document doc = new Document();
for (String input : markedUpInputs) {
Field field = new Field(fieldName, "", ft);
field.setStringValue(input);
doc.add(field);
}
iw.addDocument(doc);
DirectoryReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
passageFormatter.setAnnotations(annotations);
ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
}
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits.value, equalTo(1L));
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, name -> "text".equals(name), Integer.MAX_VALUE, Integer.MAX_VALUE);
highlighter.setFieldMatcher((name) -> "text".equals(name));
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
assertEquals(expectedPassages.length, snippets.length);
for (int i = 0; i < snippets.length; i++) {
assertEquals(expectedPassages[i], snippets[i].getText());
}
reader.close();
dir.close();
}
use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.
the class AnnotatedPassageFormatter method getIntersectingAnnotations.
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset = 0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
// the previous values AND the MULTIVAL delimiter
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + fieldValueOffset, token.value));
}
}
// add 1 for the fieldvalue separator character
fieldValueOffset += fieldValueAnnotations.textMinusMarkup.length() + 1;
}
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}
use of org.opensearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText in project OpenSearch by opensearch-project.
the class AnnotatedTextParsingTests method checkParsing.
private void checkParsing(String markup, String expectedPlainText, AnnotationToken... expectedTokens) {
AnnotatedText at = AnnotatedText.parse(markup);
assertEquals(expectedPlainText, at.textMinusMarkup);
List<AnnotationToken> actualAnnotations = at.annotations;
assertEquals(expectedTokens.length, actualAnnotations.size());
for (int i = 0; i < expectedTokens.length; i++) {
assertEquals(expectedTokens[i], actualAnnotations.get(i));
}
}
Aggregations