use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.
the class PhraseSuggester method innerExecute.
/*
* More Ideas:
* - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
* - add ability to build different error models maybe based on a confusion matrix?
* - try to combine a token with its subsequent token to find / detect word splits (optional)
* - for this to work we need some way to defined the position length of a candidate
* - phonetic filters could be interesting here too for candidate selection
*/
@Override
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelihood();
final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
final IndexReader indexReader = searcher.getIndexReader();
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
Terms terms = MultiTerms.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiTerms.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Result checkerResult;
try (TokenStream stream = tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
}
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
response.addTerm(resultEntry);
final BytesRefBuilder byteSpare = new BytesRefBuilder();
final TemplateScript.Factory scriptFactory = suggestion.getCollateQueryScript();
final boolean collatePrune = (scriptFactory != null) && suggestion.collatePrune();
for (int i = 0; i < checkerResult.corrections.length; i++) {
Correction correction = checkerResult.corrections[i];
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
boolean collateMatch = true;
if (scriptFactory != null) {
// Checks if the template query collateScript yields any documents
// from the index for a correction, collateMatch is updated
final Map<String, Object> vars = suggestion.getCollateScriptParams();
vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
QueryShardContext shardContext = suggestion.getShardContext();
final String querySource = scriptFactory.newInstance(vars).execute();
try (XContentParser parser = XContentFactory.xContent(querySource).createParser(shardContext.getXContentRegistry(), LoggingDeprecationHandler.INSTANCE, querySource)) {
QueryBuilder innerQueryBuilder = AbstractQueryBuilder.parseInnerQueryBuilder(parser);
final ParsedQuery parsedQuery = shardContext.toQuery(innerQueryBuilder);
collateMatch = Lucene.exists(searcher, parsedQuery.query());
}
}
if (!collateMatch && !collatePrune) {
continue;
}
Text phrase = new Text(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
highlighted = new Text(spare.toString());
}
if (collatePrune) {
resultEntry.addOption(new PhraseSuggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
} else {
resultEntry.addOption(new PhraseSuggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
} else {
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
}
return response;
}
use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.
the class HighlightBuilderTests method testBuildSearchContextHighlight.
/**
* test that build() outputs a {@link SearchHighlightContext} that is has similar parameters
* than what we have in the random {@link HighlightBuilder}
*/
public void testBuildSearchContextHighlight() throws IOException {
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
Index index = new Index(randomAlphaOfLengthBetween(1, 10), "_na_");
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings(index, indexSettings);
// shard context will only need indicesQueriesRegistry for building Query objects nested in highlighter
QueryShardContext mockShardContext = new QueryShardContext(0, idxSettings, BigArrays.NON_RECYCLING_INSTANCE, null, null, null, null, null, xContentRegistry(), namedWriteableRegistry, null, null, System::currentTimeMillis, null, null, () -> true, null) {
@Override
public MappedFieldType fieldMapper(String name) {
TextFieldMapper.Builder builder = new TextFieldMapper.Builder(name, createDefaultIndexAnalyzers());
return builder.build(new Mapper.BuilderContext(idxSettings.getSettings(), new ContentPath(1))).fieldType();
}
};
mockShardContext.setMapUnmappedFieldAsString(true);
for (int runs = 0; runs < NUMBER_OF_TESTBUILDERS; runs++) {
HighlightBuilder highlightBuilder = randomHighlighterBuilder();
highlightBuilder = Rewriteable.rewrite(highlightBuilder, mockShardContext);
SearchHighlightContext highlight = highlightBuilder.build(mockShardContext);
for (SearchHighlightContext.Field field : highlight.fields()) {
String encoder = highlightBuilder.encoder() != null ? highlightBuilder.encoder() : HighlightBuilder.DEFAULT_ENCODER;
assertEquals(encoder, field.fieldOptions().encoder());
final Field fieldBuilder = getFieldBuilderByName(highlightBuilder, field.field());
assertNotNull("expected a highlight builder for field " + field.field(), fieldBuilder);
FieldOptions fieldOptions = field.fieldOptions();
BiConsumer<Function<AbstractHighlighterBuilder<?>, Object>, Function<FieldOptions, Object>> checkSame = mergeBeforeChek(highlightBuilder, fieldBuilder, fieldOptions);
checkSame.accept(AbstractHighlighterBuilder::boundaryChars, FieldOptions::boundaryChars);
checkSame.accept(AbstractHighlighterBuilder::boundaryScannerType, FieldOptions::boundaryScannerType);
checkSame.accept(AbstractHighlighterBuilder::boundaryMaxScan, FieldOptions::boundaryMaxScan);
checkSame.accept(AbstractHighlighterBuilder::fragmentSize, FieldOptions::fragmentCharSize);
checkSame.accept(AbstractHighlighterBuilder::fragmenter, FieldOptions::fragmenter);
checkSame.accept(AbstractHighlighterBuilder::requireFieldMatch, FieldOptions::requireFieldMatch);
checkSame.accept(AbstractHighlighterBuilder::noMatchSize, FieldOptions::noMatchSize);
checkSame.accept(AbstractHighlighterBuilder::numOfFragments, FieldOptions::numberOfFragments);
checkSame.accept(AbstractHighlighterBuilder::phraseLimit, FieldOptions::phraseLimit);
checkSame.accept(AbstractHighlighterBuilder::highlighterType, FieldOptions::highlighterType);
checkSame.accept(AbstractHighlighterBuilder::highlightFilter, FieldOptions::highlightFilter);
checkSame.accept(AbstractHighlighterBuilder::preTags, FieldOptions::preTags);
checkSame.accept(AbstractHighlighterBuilder::postTags, FieldOptions::postTags);
checkSame.accept(AbstractHighlighterBuilder::options, FieldOptions::options);
checkSame.accept(AbstractHighlighterBuilder::order, op -> op.scoreOrdered() ? Order.SCORE : Order.NONE);
assertEquals(fieldBuilder.fragmentOffset, fieldOptions.fragmentOffset());
if (fieldBuilder.matchedFields != null) {
String[] copy = Arrays.copyOf(fieldBuilder.matchedFields, fieldBuilder.matchedFields.length);
Arrays.sort(copy);
assertArrayEquals(copy, new TreeSet<>(fieldOptions.matchedFields()).toArray(new String[fieldOptions.matchedFields().size()]));
} else {
assertNull(fieldOptions.matchedFields());
}
Query expectedValue = null;
if (fieldBuilder.highlightQuery != null) {
expectedValue = Rewriteable.rewrite(fieldBuilder.highlightQuery, mockShardContext).toQuery(mockShardContext);
} else if (highlightBuilder.highlightQuery != null) {
expectedValue = Rewriteable.rewrite(highlightBuilder.highlightQuery, mockShardContext).toQuery(mockShardContext);
}
assertEquals(expectedValue, fieldOptions.highlightQuery());
}
}
}
use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.
the class ValuesSourceConfigTests method testUnmappedKeyword.
public void testUnmappedKeyword() throws Exception {
IndexService indexService = createIndex("index", Settings.EMPTY, "type");
client().prepareIndex("index").setId("1").setSource().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.STRING, "bytes", null, null, null, null, CoreValuesSourceType.BYTES);
ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource();
assertNotNull(valuesSource);
assertFalse(config.hasValues());
config = ValuesSourceConfig.resolve(context, ValueType.STRING, "bytes", null, "abc", null, null, CoreValuesSourceType.BYTES);
valuesSource = (ValuesSource.Bytes) config.getValuesSource();
LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
assertTrue(values.advanceExact(0));
assertEquals(1, values.docValueCount());
assertEquals(new BytesRef("abc"), values.nextValue());
}
}
use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.
the class ValuesSourceConfigTests method testFieldAlias.
public void testFieldAlias() throws Exception {
IndexService indexService = createIndex("index", Settings.EMPTY, "type", "field", "type=keyword", "alias", "type=alias,path=field");
client().prepareIndex("index").setId("1").setSource("field", "value").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.STRING, "alias", null, null, null, null, CoreValuesSourceType.BYTES);
ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource();
LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
assertTrue(values.advanceExact(0));
assertEquals(1, values.docValueCount());
assertEquals(new BytesRef("value"), values.nextValue());
}
}
use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.
the class ValuesSourceConfigTests method testUnmappedBoolean.
public void testUnmappedBoolean() throws Exception {
IndexService indexService = createIndex("index", Settings.EMPTY, "type");
client().prepareIndex("index").setId("1").setSource().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "bool", null, null, null, null, CoreValuesSourceType.BYTES);
ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource();
assertNotNull(valuesSource);
assertFalse(config.hasValues());
config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "bool", null, true, null, null, CoreValuesSourceType.BYTES);
valuesSource = (ValuesSource.Numeric) config.getValuesSource();
LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
SortedNumericDocValues values = valuesSource.longValues(ctx);
assertTrue(values.advanceExact(0));
assertEquals(1, values.docValueCount());
assertEquals(1, values.nextValue());
}
}
Aggregations