Search in sources :

Example 1 with PreBuiltAnalyzerProviderFactory

use of org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory in project elasticsearch-opennlp-plugin by spinscale.

the class OpenNlpMappingTest method setupMapperParser.

@Before
public void setupMapperParser() {
    Index index = new Index("test");
    Map<String, AnalyzerProviderFactory> analyzerFactoryFactories = Maps.newHashMap();
    analyzerFactoryFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer()));
    AnalysisService analysisService = new AnalysisService(index, ImmutableSettings.Builder.EMPTY_SETTINGS, null, analyzerFactoryFactories, null, null, null);
    mapperParser = new DocumentMapperParser(index, analysisService, new PostingsFormatService(index), new SimilarityLookupService(index, ImmutableSettings.Builder.EMPTY_SETTINGS));
    Settings settings = settingsBuilder().put("opennlp.models.name.file", "src/test/resources/models/en-ner-person.bin").put("opennlp.models.date.file", "src/test/resources/models/en-ner-date.bin").put("opennlp.models.location.file", "src/test/resources/models/en-ner-location.bin").build();
    LogConfigurator.configure(settings);
    OpenNlpService openNlpService = new OpenNlpService(settings);
    openNlpService.start();
    mapperParser.putTypeParser(OpenNlpMapper.CONTENT_TYPE, new OpenNlpMapper.TypeParser(analysisService, openNlpService));
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) PostingsFormatService(org.elasticsearch.index.codec.postingsformat.PostingsFormatService) PreBuiltAnalyzerProviderFactory(org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory) Index(org.elasticsearch.index.Index) Matchers.containsString(org.hamcrest.Matchers.containsString) AnalyzerProviderFactory(org.elasticsearch.index.analysis.AnalyzerProviderFactory) PreBuiltAnalyzerProviderFactory(org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory) DocumentMapperParser(org.elasticsearch.index.mapper.DocumentMapperParser) OpenNlpService(org.elasticsearch.service.opennlp.OpenNlpService) SimilarityLookupService(org.elasticsearch.index.similarity.SimilarityLookupService) OpenNlpMapper(org.elasticsearch.index.mapper.opennlp.OpenNlpMapper) AnalysisService(org.elasticsearch.index.analysis.AnalysisService) ImmutableSettings(org.elasticsearch.common.settings.ImmutableSettings) Settings(org.elasticsearch.common.settings.Settings) Before(org.junit.Before)

Example 2 with PreBuiltAnalyzerProviderFactory

use of org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory in project crate by crate.

the class CommonAnalysisPlugin method getPreBuiltAnalyzerProviderFactories.

@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
    List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
    analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH, () -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, () -> new PatternAnalyzer(Regex.compile("\\W+", /*PatternAnalyzer.NON_WORD_PATTERN*/
    null), true, CharArraySet.EMPTY_SET)));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, () -> new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)));
    // Language analyzers:
    analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, ArabicAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, ArmenianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, BasqueAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, BengaliAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, BrazilianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, BulgarianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, CatalanAnalyzer::new));
    // chinese analyzer: only for old indices, best effort
    analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.ONE, StandardAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, CJKAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, CzechAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, DanishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, DutchAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, EnglishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, FinnishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, FrenchAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, GalicianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, GermanAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("greek", CachingStrategy.LUCENE, GreekAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("hindi", CachingStrategy.LUCENE, HindiAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("hungarian", CachingStrategy.LUCENE, HungarianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("indonesian", CachingStrategy.LUCENE, IndonesianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("irish", CachingStrategy.LUCENE, IrishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("italian", CachingStrategy.LUCENE, ItalianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("latvian", CachingStrategy.LUCENE, LatvianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("lithuanian", CachingStrategy.LUCENE, LithuanianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("norwegian", CachingStrategy.LUCENE, NorwegianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("persian", CachingStrategy.LUCENE, PersianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("portuguese", CachingStrategy.LUCENE, PortugueseAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("romanian", CachingStrategy.LUCENE, RomanianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("russian", CachingStrategy.LUCENE, RussianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("sorani", CachingStrategy.LUCENE, SoraniAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("spanish", CachingStrategy.LUCENE, SpanishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("swedish", CachingStrategy.LUCENE, SwedishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("turkish", CachingStrategy.LUCENE, TurkishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("thai", CachingStrategy.LUCENE, ThaiAnalyzer::new));
    return analyzers;
}
Also used : PreBuiltAnalyzerProviderFactory(org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory) ArrayList(java.util.ArrayList)

Aggregations

PreBuiltAnalyzerProviderFactory (org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory)2 ArrayList (java.util.ArrayList)1 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)1 ImmutableSettings (org.elasticsearch.common.settings.ImmutableSettings)1 Settings (org.elasticsearch.common.settings.Settings)1 Index (org.elasticsearch.index.Index)1 AnalysisService (org.elasticsearch.index.analysis.AnalysisService)1 AnalyzerProviderFactory (org.elasticsearch.index.analysis.AnalyzerProviderFactory)1 PostingsFormatService (org.elasticsearch.index.codec.postingsformat.PostingsFormatService)1 DocumentMapperParser (org.elasticsearch.index.mapper.DocumentMapperParser)1 OpenNlpMapper (org.elasticsearch.index.mapper.opennlp.OpenNlpMapper)1 SimilarityLookupService (org.elasticsearch.index.similarity.SimilarityLookupService)1 OpenNlpService (org.elasticsearch.service.opennlp.OpenNlpService)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 Before (org.junit.Before)1