Search in sources :

Example 11 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class SamplerAggregatorTests method testSampler.

/**
     * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search.
     */
public void testSampler() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    numericFieldType.setName("int");
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    // flush on open to have a single segment with predictable docIds
    indexWriterConfig.setRAMBufferSizeMB(100);
    try (Directory dir = newDirectory();
        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), textFieldType));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }
        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(3).subAggregation(new MinAggregationBuilder("min").field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType, numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(5.0, min.getValue(), 0);
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Term(org.apache.lucene.index.Term) TextFieldType(org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Field(org.apache.lucene.document.Field) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Min(org.elasticsearch.search.aggregations.metrics.min.Min) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) MinAggregationBuilder(org.elasticsearch.search.aggregations.metrics.min.MinAggregationBuilder) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory)

Example 12 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class DocumentFieldMapperTests method testAnalyzers.

public void testAnalyzers() throws IOException {
    FakeFieldType fieldType1 = new FakeFieldType();
    fieldType1.setName("field1");
    fieldType1.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.INDEX, new FakeAnalyzer("index")));
    fieldType1.setSearchAnalyzer(new NamedAnalyzer("bar", AnalyzerScope.INDEX, new FakeAnalyzer("search")));
    fieldType1.setSearchQuoteAnalyzer(new NamedAnalyzer("baz", AnalyzerScope.INDEX, new FakeAnalyzer("search_quote")));
    FieldMapper fieldMapper1 = new FakeFieldMapper("field1", fieldType1);
    FakeFieldType fieldType2 = new FakeFieldType();
    fieldType2.setName("field2");
    FieldMapper fieldMapper2 = new FakeFieldMapper("field2", fieldType2);
    Analyzer defaultIndex = new FakeAnalyzer("default_index");
    Analyzer defaultSearch = new FakeAnalyzer("default_search");
    Analyzer defaultSearchQuote = new FakeAnalyzer("default_search_quote");
    DocumentFieldMappers documentFieldMappers = new DocumentFieldMappers(Arrays.asList(fieldMapper1, fieldMapper2), defaultIndex, defaultSearch, defaultSearchQuote);
    assertAnalyzes(documentFieldMappers.indexAnalyzer(), "field1", "index");
    assertAnalyzes(documentFieldMappers.searchAnalyzer(), "field1", "search");
    assertAnalyzes(documentFieldMappers.searchQuoteAnalyzer(), "field1", "search_quote");
    assertAnalyzes(documentFieldMappers.indexAnalyzer(), "field2", "default_index");
    assertAnalyzes(documentFieldMappers.searchAnalyzer(), "field2", "default_search");
    assertAnalyzes(documentFieldMappers.searchQuoteAnalyzer(), "field2", "default_search_quote");
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer)

Example 13 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class CompletionFieldMapperTests method testDefaultConfiguration.

public void testDefaultConfiguration() throws IOException {
    String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("completion").field("type", "completion").endObject().endObject().endObject().endObject().string();
    DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse("type1", new CompressedXContent(mapping));
    FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion");
    assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class));
    MappedFieldType completionFieldType = fieldMapper.fieldType();
    NamedAnalyzer indexAnalyzer = completionFieldType.indexAnalyzer();
    assertThat(indexAnalyzer.name(), equalTo("simple"));
    assertThat(indexAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class));
    CompletionAnalyzer analyzer = (CompletionAnalyzer) indexAnalyzer.analyzer();
    assertThat(analyzer.preservePositionIncrements(), equalTo(true));
    assertThat(analyzer.preserveSep(), equalTo(true));
    NamedAnalyzer searchAnalyzer = completionFieldType.searchAnalyzer();
    assertThat(searchAnalyzer.name(), equalTo("simple"));
    assertThat(searchAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class));
    analyzer = (CompletionAnalyzer) searchAnalyzer.analyzer();
    assertThat(analyzer.preservePositionIncrements(), equalTo(true));
    assertThat(analyzer.preserveSep(), equalTo(true));
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) CompletionAnalyzer(org.apache.lucene.search.suggest.document.CompletionAnalyzer) Matchers.containsString(org.hamcrest.Matchers.containsString)

Example 14 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class KeywordFieldTypeTests method testTermQueryWithNormalizer.

public void testTermQueryWithNormalizer() {
    MappedFieldType ft = createDefaultFieldType();
    ft.setName("field");
    ft.setIndexOptions(IndexOptions.DOCS);
    Analyzer normalizer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer in = new WhitespaceTokenizer();
            TokenFilter out = new LowerCaseFilter(in);
            return new TokenStreamComponents(in, out);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new LowerCaseFilter(in);
        }
    };
    ft.setSearchAnalyzer(new NamedAnalyzer("my_normalizer", AnalyzerScope.INDEX, normalizer));
    assertEquals(new TermQuery(new Term("field", "foo bar")), ft.termQuery("fOo BaR", null));
    ft.setIndexOptions(IndexOptions.NONE);
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.termQuery("bar", null));
    assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) TermQuery(org.apache.lucene.search.TermQuery) TokenStream(org.apache.lucene.analysis.TokenStream) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Term(org.apache.lucene.index.Term) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter)

Example 15 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class MetaDataIndexUpgradeService method checkMappingsCompatibility.

/**
     * Checks the mappings for compatibility with the current version
     */
private void checkMappingsCompatibility(IndexMetaData indexMetaData) {
    try {
        // We cannot instantiate real analysis server at this point because the node might not have
        // been started yet. However, we don't really need real analyzers at this stage - so we can fake it
        IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
        SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
        final NamedAnalyzer fakeDefault = new NamedAnalyzer("fake_default", AnalyzerScope.INDEX, new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                throw new UnsupportedOperationException("shouldn't be here");
            }
        });
        // this is just a fake map that always returns the same value for any possible string key
        // also the entrySet impl isn't fully correct but we implement it since internally
        // IndexAnalyzers will iterate over all analyzers to close them.
        final Map<String, NamedAnalyzer> analyzerMap = new AbstractMap<String, NamedAnalyzer>() {

            @Override
            public NamedAnalyzer get(Object key) {
                assert key instanceof String : "key must be a string but was: " + key.getClass();
                return new NamedAnalyzer((String) key, AnalyzerScope.INDEX, fakeDefault.analyzer());
            }

            @Override
            public Set<Entry<String, NamedAnalyzer>> entrySet() {
                return Collections.emptySet();
            }
        };
        try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap, analyzerMap)) {
            MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, xContentRegistry, similarityService, mapperRegistry, () -> null);
            mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, false);
        }
    } catch (Exception ex) {
        // Wrap the inner exception so we have the index name in the exception message
        throw new IllegalStateException("unable to upgrade the mappings for the index [" + indexMetaData.getIndex() + "]", ex);
    }
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) IndexSettings(org.elasticsearch.index.IndexSettings) Analyzer(org.apache.lucene.analysis.Analyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) AbstractMap(java.util.AbstractMap) SimilarityService(org.elasticsearch.index.similarity.SimilarityService) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) MapperService(org.elasticsearch.index.mapper.MapperService)

Aggregations

NamedAnalyzer (org.elasticsearch.index.analysis.NamedAnalyzer)15 Analyzer (org.apache.lucene.analysis.Analyzer)4 TokenStream (org.apache.lucene.analysis.TokenStream)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)3 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)3 IndexSettings (org.elasticsearch.index.IndexSettings)3 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Tokenizer (org.apache.lucene.analysis.Tokenizer)2 Field (org.apache.lucene.document.Field)2 Term (org.apache.lucene.index.Term)2 TermQuery (org.apache.lucene.search.TermQuery)2 CompletionAnalyzer (org.apache.lucene.search.suggest.document.CompletionAnalyzer)2 CustomAnalyzer (org.elasticsearch.index.analysis.CustomAnalyzer)2 IndexAnalyzers (org.elasticsearch.index.analysis.IndexAnalyzers)2 TokenFilterFactory (org.elasticsearch.index.analysis.TokenFilterFactory)2 Reader (java.io.Reader)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1