Search in sources :

Example 86 with QueryShardContext

use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.

the class PhraseSuggester method innerExecute.

/*
     * More Ideas:
     *   - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
     *   - add ability to build different error models maybe based on a confusion matrix?
     *   - try to combine a token with its subsequent token to find / detect word splits (optional)
     *      - for this to work we need some way to defined the position length of a candidate
     *   - phonetic filters could be interesting here too for candidate selection
     */
@Override
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    double realWordErrorLikelihood = suggestion.realworldErrorLikelihood();
    final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
    final IndexReader indexReader = searcher.getIndexReader();
    List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
    final int numGenerators = generators.size();
    final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
    for (int i = 0; i < numGenerators; i++) {
        PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
        DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
        Terms terms = MultiTerms.getTerms(indexReader, generator.field());
        if (terms != null) {
            gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
        }
    }
    final String suggestField = suggestion.getField();
    final Terms suggestTerms = MultiTerms.getTerms(indexReader, suggestField);
    if (gens.size() > 0 && suggestTerms != null) {
        final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
        final BytesRef separator = suggestion.separator();
        WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
        Result checkerResult;
        try (TokenStream stream = tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
            checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
        }
        PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
        response.addTerm(resultEntry);
        final BytesRefBuilder byteSpare = new BytesRefBuilder();
        final TemplateScript.Factory scriptFactory = suggestion.getCollateQueryScript();
        final boolean collatePrune = (scriptFactory != null) && suggestion.collatePrune();
        for (int i = 0; i < checkerResult.corrections.length; i++) {
            Correction correction = checkerResult.corrections[i];
            spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
            boolean collateMatch = true;
            if (scriptFactory != null) {
                // Checks if the template query collateScript yields any documents
                // from the index for a correction, collateMatch is updated
                final Map<String, Object> vars = suggestion.getCollateScriptParams();
                vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
                QueryShardContext shardContext = suggestion.getShardContext();
                final String querySource = scriptFactory.newInstance(vars).execute();
                try (XContentParser parser = XContentFactory.xContent(querySource).createParser(shardContext.getXContentRegistry(), LoggingDeprecationHandler.INSTANCE, querySource)) {
                    QueryBuilder innerQueryBuilder = AbstractQueryBuilder.parseInnerQueryBuilder(parser);
                    final ParsedQuery parsedQuery = shardContext.toQuery(innerQueryBuilder);
                    collateMatch = Lucene.exists(searcher, parsedQuery.query());
                }
            }
            if (!collateMatch && !collatePrune) {
                continue;
            }
            Text phrase = new Text(spare.toString());
            Text highlighted = null;
            if (suggestion.getPreTag() != null) {
                spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
                highlighted = new Text(spare.toString());
            }
            if (collatePrune) {
                resultEntry.addOption(new PhraseSuggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
            } else {
                resultEntry.addOption(new PhraseSuggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
            }
        }
    } else {
        response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
    }
    return response;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) ParsedQuery(org.opensearch.index.query.ParsedQuery) ArrayList(java.util.ArrayList) QueryBuilder(org.opensearch.index.query.QueryBuilder) AbstractQueryBuilder(org.opensearch.index.query.AbstractQueryBuilder) Result(org.opensearch.search.suggest.phrase.NoisyChannelSpellChecker.Result) Entry(org.opensearch.search.suggest.Suggest.Suggestion.Entry) QueryShardContext(org.opensearch.index.query.QueryShardContext) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Terms(org.apache.lucene.index.Terms) MultiTerms(org.apache.lucene.index.MultiTerms) Text(org.opensearch.common.text.Text) IndexReader(org.apache.lucene.index.IndexReader) TemplateScript(org.opensearch.script.TemplateScript) XContentParser(org.opensearch.common.xcontent.XContentParser)

Example 87 with QueryShardContext

use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.

the class HighlightBuilderTests method testBuildSearchContextHighlight.

/**
 * test that build() outputs a {@link SearchHighlightContext} that is has similar parameters
 * than what we have in the random {@link HighlightBuilder}
 */
public void testBuildSearchContextHighlight() throws IOException {
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
    Index index = new Index(randomAlphaOfLengthBetween(1, 10), "_na_");
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings(index, indexSettings);
    // shard context will only need indicesQueriesRegistry for building Query objects nested in highlighter
    QueryShardContext mockShardContext = new QueryShardContext(0, idxSettings, BigArrays.NON_RECYCLING_INSTANCE, null, null, null, null, null, xContentRegistry(), namedWriteableRegistry, null, null, System::currentTimeMillis, null, null, () -> true, null) {

        @Override
        public MappedFieldType fieldMapper(String name) {
            TextFieldMapper.Builder builder = new TextFieldMapper.Builder(name, createDefaultIndexAnalyzers());
            return builder.build(new Mapper.BuilderContext(idxSettings.getSettings(), new ContentPath(1))).fieldType();
        }
    };
    mockShardContext.setMapUnmappedFieldAsString(true);
    for (int runs = 0; runs < NUMBER_OF_TESTBUILDERS; runs++) {
        HighlightBuilder highlightBuilder = randomHighlighterBuilder();
        highlightBuilder = Rewriteable.rewrite(highlightBuilder, mockShardContext);
        SearchHighlightContext highlight = highlightBuilder.build(mockShardContext);
        for (SearchHighlightContext.Field field : highlight.fields()) {
            String encoder = highlightBuilder.encoder() != null ? highlightBuilder.encoder() : HighlightBuilder.DEFAULT_ENCODER;
            assertEquals(encoder, field.fieldOptions().encoder());
            final Field fieldBuilder = getFieldBuilderByName(highlightBuilder, field.field());
            assertNotNull("expected a highlight builder for field " + field.field(), fieldBuilder);
            FieldOptions fieldOptions = field.fieldOptions();
            BiConsumer<Function<AbstractHighlighterBuilder<?>, Object>, Function<FieldOptions, Object>> checkSame = mergeBeforeChek(highlightBuilder, fieldBuilder, fieldOptions);
            checkSame.accept(AbstractHighlighterBuilder::boundaryChars, FieldOptions::boundaryChars);
            checkSame.accept(AbstractHighlighterBuilder::boundaryScannerType, FieldOptions::boundaryScannerType);
            checkSame.accept(AbstractHighlighterBuilder::boundaryMaxScan, FieldOptions::boundaryMaxScan);
            checkSame.accept(AbstractHighlighterBuilder::fragmentSize, FieldOptions::fragmentCharSize);
            checkSame.accept(AbstractHighlighterBuilder::fragmenter, FieldOptions::fragmenter);
            checkSame.accept(AbstractHighlighterBuilder::requireFieldMatch, FieldOptions::requireFieldMatch);
            checkSame.accept(AbstractHighlighterBuilder::noMatchSize, FieldOptions::noMatchSize);
            checkSame.accept(AbstractHighlighterBuilder::numOfFragments, FieldOptions::numberOfFragments);
            checkSame.accept(AbstractHighlighterBuilder::phraseLimit, FieldOptions::phraseLimit);
            checkSame.accept(AbstractHighlighterBuilder::highlighterType, FieldOptions::highlighterType);
            checkSame.accept(AbstractHighlighterBuilder::highlightFilter, FieldOptions::highlightFilter);
            checkSame.accept(AbstractHighlighterBuilder::preTags, FieldOptions::preTags);
            checkSame.accept(AbstractHighlighterBuilder::postTags, FieldOptions::postTags);
            checkSame.accept(AbstractHighlighterBuilder::options, FieldOptions::options);
            checkSame.accept(AbstractHighlighterBuilder::order, op -> op.scoreOrdered() ? Order.SCORE : Order.NONE);
            assertEquals(fieldBuilder.fragmentOffset, fieldOptions.fragmentOffset());
            if (fieldBuilder.matchedFields != null) {
                String[] copy = Arrays.copyOf(fieldBuilder.matchedFields, fieldBuilder.matchedFields.length);
                Arrays.sort(copy);
                assertArrayEquals(copy, new TreeSet<>(fieldOptions.matchedFields()).toArray(new String[fieldOptions.matchedFields().size()]));
            } else {
                assertNull(fieldOptions.matchedFields());
            }
            Query expectedValue = null;
            if (fieldBuilder.highlightQuery != null) {
                expectedValue = Rewriteable.rewrite(fieldBuilder.highlightQuery, mockShardContext).toQuery(mockShardContext);
            } else if (highlightBuilder.highlightQuery != null) {
                expectedValue = Rewriteable.rewrite(highlightBuilder.highlightQuery, mockShardContext).toQuery(mockShardContext);
            }
            assertEquals(expectedValue, fieldOptions.highlightQuery());
        }
    }
}
Also used : Query(org.apache.lucene.search.Query) IndexSettings(org.opensearch.index.IndexSettings) QueryBuilder(org.opensearch.index.query.QueryBuilder) IdsQueryBuilder(org.opensearch.index.query.IdsQueryBuilder) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) MatchAllQueryBuilder(org.opensearch.index.query.MatchAllQueryBuilder) FieldOptions(org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions) Index(org.opensearch.index.Index) Matchers.containsString(org.hamcrest.Matchers.containsString) ContentPath(org.opensearch.index.mapper.ContentPath) Field(org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field) Function(java.util.function.Function) TreeSet(java.util.TreeSet) QueryShardContext(org.opensearch.index.query.QueryShardContext) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) TextFieldMapper(org.opensearch.index.mapper.TextFieldMapper)

Example 88 with QueryShardContext

use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.

the class ValuesSourceConfigTests method testUnmappedKeyword.

public void testUnmappedKeyword() throws Exception {
    IndexService indexService = createIndex("index", Settings.EMPTY, "type");
    client().prepareIndex("index").setId("1").setSource().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
    try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
        QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
        ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.STRING, "bytes", null, null, null, null, CoreValuesSourceType.BYTES);
        ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource();
        assertNotNull(valuesSource);
        assertFalse(config.hasValues());
        config = ValuesSourceConfig.resolve(context, ValueType.STRING, "bytes", null, "abc", null, null, CoreValuesSourceType.BYTES);
        valuesSource = (ValuesSource.Bytes) config.getValuesSource();
        LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
        SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
        assertTrue(values.advanceExact(0));
        assertEquals(1, values.docValueCount());
        assertEquals(new BytesRef("abc"), values.nextValue());
    }
}
Also used : IndexService(org.opensearch.index.IndexService) QueryShardContext(org.opensearch.index.query.QueryShardContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Engine(org.opensearch.index.engine.Engine) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.opensearch.index.fielddata.SortedBinaryDocValues)

Example 89 with QueryShardContext

use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.

the class ValuesSourceConfigTests method testFieldAlias.

public void testFieldAlias() throws Exception {
    IndexService indexService = createIndex("index", Settings.EMPTY, "type", "field", "type=keyword", "alias", "type=alias,path=field");
    client().prepareIndex("index").setId("1").setSource("field", "value").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
    try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
        QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
        ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.STRING, "alias", null, null, null, null, CoreValuesSourceType.BYTES);
        ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource();
        LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
        SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
        assertTrue(values.advanceExact(0));
        assertEquals(1, values.docValueCount());
        assertEquals(new BytesRef("value"), values.nextValue());
    }
}
Also used : IndexService(org.opensearch.index.IndexService) QueryShardContext(org.opensearch.index.query.QueryShardContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Engine(org.opensearch.index.engine.Engine) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.opensearch.index.fielddata.SortedBinaryDocValues)

Example 90 with QueryShardContext

use of org.opensearch.index.query.QueryShardContext in project OpenSearch by opensearch-project.

the class ValuesSourceConfigTests method testUnmappedBoolean.

public void testUnmappedBoolean() throws Exception {
    IndexService indexService = createIndex("index", Settings.EMPTY, "type");
    client().prepareIndex("index").setId("1").setSource().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get();
    try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
        QueryShardContext context = indexService.newQueryShardContext(0, searcher, () -> 42L, null);
        ValuesSourceConfig config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "bool", null, null, null, null, CoreValuesSourceType.BYTES);
        ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource();
        assertNotNull(valuesSource);
        assertFalse(config.hasValues());
        config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "bool", null, true, null, null, CoreValuesSourceType.BYTES);
        valuesSource = (ValuesSource.Numeric) config.getValuesSource();
        LeafReaderContext ctx = searcher.getIndexReader().leaves().get(0);
        SortedNumericDocValues values = valuesSource.longValues(ctx);
        assertTrue(values.advanceExact(0));
        assertEquals(1, values.docValueCount());
        assertEquals(1, values.nextValue());
    }
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) IndexService(org.opensearch.index.IndexService) QueryShardContext(org.opensearch.index.query.QueryShardContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Engine(org.opensearch.index.engine.Engine)

Aggregations

QueryShardContext (org.opensearch.index.query.QueryShardContext)135 Query (org.apache.lucene.search.Query)46 QueryBuilder (org.opensearch.index.query.QueryBuilder)29 TermQuery (org.apache.lucene.search.TermQuery)27 IndexSettings (org.opensearch.index.IndexSettings)25 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)23 Settings (org.opensearch.common.settings.Settings)22 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)21 IndexService (org.opensearch.index.IndexService)20 Term (org.apache.lucene.index.Term)17 BooleanQuery (org.apache.lucene.search.BooleanQuery)17 Directory (org.apache.lucene.store.Directory)17 XContentBuilder (org.opensearch.common.xcontent.XContentBuilder)17 Engine (org.opensearch.index.engine.Engine)17 BytesRef (org.apache.lucene.util.BytesRef)16 MatchAllQueryBuilder (org.opensearch.index.query.MatchAllQueryBuilder)16 SortField (org.apache.lucene.search.SortField)15 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 IndexReader (org.apache.lucene.index.IndexReader)13 Matchers.containsString (org.hamcrest.Matchers.containsString)13