Search in sources :

Example 1 with TextFieldType

use of org.opensearch.index.mapper.TextFieldMapper.TextFieldType in project OpenSearch by opensearch-project.

the class TextFieldMapperTests method registerParameters.

@Override
protected void registerParameters(ParameterChecker checker) throws IOException {
    checker.registerUpdateCheck(b -> b.field("fielddata", true), m -> {
        TextFieldType ft = (TextFieldType) m.fieldType();
        assertTrue(ft.fielddata());
    });
    checker.registerUpdateCheck(b -> {
        b.field("fielddata", true);
        b.startObject("fielddata_frequency_filter");
        {
            b.field("min", 10);
            b.field("max", 20);
            b.field("min_segment_size", 100);
        }
        b.endObject();
    }, m -> {
        TextFieldType ft = (TextFieldType) m.fieldType();
        assertEquals(10, ft.fielddataMinFrequency(), 0);
        assertEquals(20, ft.fielddataMaxFrequency(), 0);
        assertEquals(100, ft.fielddataMinSegmentSize());
    });
    checker.registerUpdateCheck(b -> b.field("eager_global_ordinals", "true"), m -> assertTrue(m.fieldType().eagerGlobalOrdinals()));
    checker.registerUpdateCheck(b -> {
        b.field("analyzer", "default");
        b.field("search_analyzer", "keyword");
    }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
    checker.registerUpdateCheck(b -> {
        b.field("analyzer", "default");
        b.field("search_analyzer", "keyword");
        b.field("search_quote_analyzer", "keyword");
    }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
    checker.registerConflictCheck("index", b -> b.field("index", false));
    checker.registerConflictCheck("store", b -> b.field("store", true));
    checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true));
    checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject());
    checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs"));
    checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean"));
    checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword"));
    checker.registerConflictCheck("term_vector", b -> b.field("term_vector", "yes"));
    checker.registerConflictCheck("position_increment_gap", b -> b.field("position_increment_gap", 10));
    // norms can be set from true to false, but not vice versa
    checker.registerConflictCheck("norms", fieldMapping(b -> {
        b.field("type", "text");
        b.field("norms", false);
    }), fieldMapping(b -> {
        b.field("type", "text");
        b.field("norms", true);
    }));
    checker.registerUpdateCheck(b -> {
        b.field("type", "text");
        b.field("norms", true);
    }, b -> {
        b.field("type", "text");
        b.field("norms", false);
    }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms()));
    checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0));
}
Also used : Query(org.apache.lucene.search.Query) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Arrays(java.util.Arrays) FieldType(org.apache.lucene.document.FieldType) IndexableField(org.apache.lucene.index.IndexableField) ToXContent(org.opensearch.common.xcontent.ToXContent) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) AnalyzerScope(org.opensearch.index.analysis.AnalyzerScope) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) Strings(org.opensearch.common.Strings) TermsEnum(org.apache.lucene.index.TermsEnum) Map(java.util.Map) XContentFactory(org.opensearch.common.xcontent.XContentFactory) Is.is(org.hamcrest.core.Is.is) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) BytesRef(org.apache.lucene.util.BytesRef) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) QueryShardContext(org.opensearch.index.query.QueryShardContext) Matchers.containsString(org.hamcrest.Matchers.containsString) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) MatchQuery(org.opensearch.index.search.MatchQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) HashMap(java.util.HashMap) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Token(org.apache.lucene.analysis.Token) MockSynonymAnalyzer(org.apache.lucene.analysis.MockSynonymAnalyzer) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) MatchPhraseQueryBuilder(org.opensearch.index.query.MatchPhraseQueryBuilder) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) StopFilter(org.apache.lucene.analysis.StopFilter) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) PostingsEnum(org.apache.lucene.index.PostingsEnum) TokenStream(org.apache.lucene.analysis.TokenStream) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) MatchPhrasePrefixQueryBuilder(org.opensearch.index.query.MatchPhrasePrefixQueryBuilder) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) BooleanClause(org.apache.lucene.search.BooleanClause) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) TermQuery(org.apache.lucene.search.TermQuery) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) DocValuesType(org.apache.lucene.index.DocValuesType) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) IndexOptions(org.apache.lucene.index.IndexOptions) Collections(java.util.Collections) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType)

Example 2 with TextFieldType

use of org.opensearch.index.mapper.TextFieldMapper.TextFieldType in project OpenSearch by opensearch-project.

the class TextFieldTypeTests method testFuzzyQuery.

public void testFuzzyQuery() {
    MappedFieldType ft = createFieldType();
    assertEquals(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC));
    MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap());
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC));
    assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
    OpenSearchException ee = expectThrows(OpenSearchException.class, () -> ft.fuzzyQuery("foo", Fuzziness.AUTO, randomInt(10) + 1, randomInt(10) + 1, randomBoolean(), MOCK_QSC_DISALLOW_EXPENSIVE));
    assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
}
Also used : FuzzyQuery(org.apache.lucene.search.FuzzyQuery) OpenSearchException(org.opensearch.OpenSearchException) Term(org.apache.lucene.index.Term) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType)

Example 3 with TextFieldType

use of org.opensearch.index.mapper.TextFieldMapper.TextFieldType in project OpenSearch by opensearch-project.

the class TextFieldTypeTests method testTermsQuery.

public void testTermsQuery() {
    MappedFieldType ft = createFieldType();
    List<BytesRef> terms = new ArrayList<>();
    terms.add(new BytesRef("foo"));
    terms.add(new BytesRef("bar"));
    assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null));
    MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap());
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null));
    assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
Also used : TermInSetQuery(org.apache.lucene.search.TermInSetQuery) ArrayList(java.util.ArrayList) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with TextFieldType

use of org.opensearch.index.mapper.TextFieldMapper.TextFieldType in project OpenSearch by opensearch-project.

the class SamplerAggregatorTests method testRidiculousSize.

public void testRidiculousSize() throws IOException {
    TextFieldType textFieldType = new TextFieldType("text");
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType("int", NumberFieldMapper.NumberType.LONG);
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    // flush on open to have a single segment with predictable docIds
    indexWriterConfig.setRAMBufferSizeMB(100);
    try (Directory dir = newDirectory();
        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), TextFieldMapper.Defaults.FIELD_TYPE));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }
        // Test with an outrageously large size to ensure that the maxDoc protection works
        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(Integer.MAX_VALUE).subAggregation(new MinAggregationBuilder("min").field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType, numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(3.0, min.getValue(), 0);
            assertTrue(AggregationInspectionHelper.hasValue(sampler));
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Field(org.apache.lucene.document.Field) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Min(org.opensearch.search.aggregations.metrics.Min) IndexWriter(org.apache.lucene.index.IndexWriter) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) MinAggregationBuilder(org.opensearch.search.aggregations.metrics.MinAggregationBuilder) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory)

Example 5 with TextFieldType

use of org.opensearch.index.mapper.TextFieldMapper.TextFieldType in project OpenSearch by opensearch-project.

the class SignificantTextAggregatorTests method testIncludeExcludes.

/**
 * Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
 */
public void testIncludeExcludes() throws IOException {
    TextFieldType textFieldType = new TextFieldType("text");
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    // flush on open to have a single segment
    indexWriterConfig.setRAMBufferSizeMB(100);
    try (Directory dir = newDirectory();
        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        indexDocuments(w);
        String[] incExcValues = { "duplicate" };
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            // Inclusive of values
            {
                SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").includeExclude(new IncludeExclude(incExcValues, null));
                SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").subAggregation(sigAgg);
                if (randomBoolean()) {
                    sigAgg.sourceFieldNames(Arrays.asList(new String[] { "json_only_field" }));
                }
                // Search "even" which should have duplication
                InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
                SignificantTerms terms = sampler.getAggregations().get("sig_text");
                assertNull(terms.getBucketByKey("even"));
                assertNotNull(terms.getBucketByKey("duplicate"));
                assertTrue(AggregationInspectionHelper.hasValue(sampler));
            }
            // Exclusive of values
            {
                SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").includeExclude(new IncludeExclude(null, incExcValues));
                SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").subAggregation(sigAgg);
                if (randomBoolean()) {
                    sigAgg.sourceFieldNames(Arrays.asList(new String[] { "json_only_field" }));
                }
                // Search "even" which should have duplication
                InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
                SignificantTerms terms = sampler.getAggregations().get("sig_text");
                assertNotNull(terms.getBucketByKey("even"));
                assertNull(terms.getBucketByKey("duplicate"));
                assertTrue(AggregationInspectionHelper.hasValue(sampler));
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) Term(org.apache.lucene.index.Term) TextFieldType(org.opensearch.index.mapper.TextFieldMapper.TextFieldType) InternalSampler(org.opensearch.search.aggregations.bucket.sampler.InternalSampler) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) SamplerAggregationBuilder(org.opensearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory)

Aggregations

TextFieldType (org.opensearch.index.mapper.TextFieldMapper.TextFieldType)20 Term (org.apache.lucene.index.Term)16 TermQuery (org.apache.lucene.search.TermQuery)14 IndexReader (org.apache.lucene.index.IndexReader)11 IndexWriter (org.apache.lucene.index.IndexWriter)11 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)11 IndexSearcher (org.apache.lucene.search.IndexSearcher)11 Directory (org.apache.lucene.store.Directory)11 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)10 NamedAnalyzer (org.opensearch.index.analysis.NamedAnalyzer)10 Document (org.apache.lucene.document.Document)4 Field (org.apache.lucene.document.Field)4 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)4 BytesRef (org.apache.lucene.util.BytesRef)4 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2