Search in sources :

Example 76 with IndexSettings

use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.

the class CompoundAnalysisTests method analyze.

private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = createAnalysisModule(settings);
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
    TokenStream stream = analyzer.tokenStream("", text);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) IndexSettings(org.opensearch.index.IndexSettings) ArrayList(java.util.ArrayList) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 77 with IndexSettings

use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.

the class EdgeNGramTokenizerTests method buildAnalyzers.

private IndexAnalyzers buildAnalyzers(Version version, String tokenizer) throws IOException {
    Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).put("index.analysis.analyzer.my_analyzer.tokenizer", tokenizer).build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    return new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
}
Also used : IndexSettings(org.opensearch.index.IndexSettings) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 78 with IndexSettings

use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.

the class WhitespaceTokenizerFactoryTests method testMaxTokenLength.

public void testMaxTokenLength() throws IOException {
    final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
    IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings);
    final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build();
    WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", settings).create();
    try (Reader reader = new StringReader("one, two, three")) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" });
    }
    final Settings defaultSettings = Settings.EMPTY;
    tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings).create();
    String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256);
    try (Reader reader = new StringReader(veryLongToken)) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { veryLongToken.substring(0, 255), veryLongToken.substring(255) });
    }
    final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build();
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());
    final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build();
    e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) IndexSettings(org.opensearch.index.IndexSettings) StringReader(java.io.StringReader) Reader(java.io.Reader) StringReader(java.io.StringReader) Index(org.opensearch.index.Index) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 79 with IndexSettings

use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.

the class SynonymsAnalysisTests method testPreconfigured.

public void testPreconfigured() throws IOException {
    Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("path.home", createTempDir().toString()).put("index.analysis.filter.synonyms.type", "synonym").putList("index.analysis.filter.synonyms.synonyms", "würst, sausage").put("index.analysis.analyzer.my_analyzer.tokenizer", "standard").putList("index.analysis.analyzer.my_analyzer.filter", "lowercase", "asciifolding", "synonyms").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
    indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
    BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("my_analyzer"), "würst", new String[] { "wurst", "sausage" }, new int[] { 1, 0 });
}
Also used : IndexSettings(org.opensearch.index.IndexSettings) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 80 with IndexSettings

use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.

the class SynonymsAnalysisTests method testChainedSynonymFilters.

public void testChainedSynonymFilters() throws IOException {
    Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("path.home", createTempDir().toString()).put("index.analysis.filter.synonyms1.type", "synonym").putList("index.analysis.filter.synonyms1.synonyms", "term1, term2").put("index.analysis.filter.synonyms2.type", "synonym").putList("index.analysis.filter.synonyms2.synonyms", "term1, term3").put("index.analysis.analyzer.syn.tokenizer", "standard").putList("index.analysis.analyzer.syn.filter", "lowercase", "synonyms1", "synonyms2").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
    indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
    BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("syn"), "term1", new String[] { "term1", "term3", "term2" }, new int[] { 1, 0, 0 });
}
Also used : IndexSettings(org.opensearch.index.IndexSettings) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Aggregations

IndexSettings (org.opensearch.index.IndexSettings)195 Settings (org.opensearch.common.settings.Settings)137 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)72 IOException (java.io.IOException)39 Index (org.opensearch.index.Index)32 Matchers.containsString (org.hamcrest.Matchers.containsString)25 Version (org.opensearch.Version)23 Store (org.opensearch.index.store.Store)23 Map (java.util.Map)22 QueryShardContext (org.opensearch.index.query.QueryShardContext)22 OpenSearchException (org.opensearch.OpenSearchException)21 MapperService (org.opensearch.index.mapper.MapperService)20 ShardId (org.opensearch.index.shard.ShardId)19 HashMap (java.util.HashMap)18 ArrayList (java.util.ArrayList)17 List (java.util.List)17 IndexService (org.opensearch.index.IndexService)17 IndexShard (org.opensearch.index.shard.IndexShard)17 HashSet (java.util.HashSet)16 Query (org.apache.lucene.search.Query)16