use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.
the class CompoundAnalysisTests method analyze.
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = createAnalysisModule(settings);
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
TokenStream stream = analyzer.tokenStream("", text);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.
the class EdgeNGramTokenizerTests method buildAnalyzers.
private IndexAnalyzers buildAnalyzers(Version version, String tokenizer) throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).put("index.analysis.analyzer.my_analyzer.tokenizer", tokenizer).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
return new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
}
use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.
the class WhitespaceTokenizerFactoryTests method testMaxTokenLength.
public void testMaxTokenLength() throws IOException {
final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings);
final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build();
WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", settings).create();
try (Reader reader = new StringReader("one, two, three")) {
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" });
}
final Settings defaultSettings = Settings.EMPTY;
tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings).create();
String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256);
try (Reader reader = new StringReader(veryLongToken)) {
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[] { veryLongToken.substring(0, 255), veryLongToken.substring(255) });
}
final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create());
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());
final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build();
e = expectThrows(IllegalArgumentException.class, () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create());
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}
use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.
the class SynonymsAnalysisTests method testPreconfigured.
public void testPreconfigured() throws IOException {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("path.home", createTempDir().toString()).put("index.analysis.filter.synonyms.type", "synonym").putList("index.analysis.filter.synonyms.synonyms", "würst, sausage").put("index.analysis.analyzer.my_analyzer.tokenizer", "standard").putList("index.analysis.analyzer.my_analyzer.filter", "lowercase", "asciifolding", "synonyms").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("my_analyzer"), "würst", new String[] { "wurst", "sausage" }, new int[] { 1, 0 });
}
use of org.opensearch.index.IndexSettings in project OpenSearch by opensearch-project.
the class SynonymsAnalysisTests method testChainedSynonymFilters.
public void testChainedSynonymFilters() throws IOException {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("path.home", createTempDir().toString()).put("index.analysis.filter.synonyms1.type", "synonym").putList("index.analysis.filter.synonyms1.synonyms", "term1, term2").put("index.analysis.filter.synonyms2.type", "synonym").putList("index.analysis.filter.synonyms2.synonyms", "term1, term3").put("index.analysis.analyzer.syn.tokenizer", "standard").putList("index.analysis.analyzer.syn.filter", "lowercase", "synonyms1", "synonyms2").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("syn"), "term1", new String[] { "term1", "term3", "term2" }, new int[] { 1, 0, 0 });
}
Aggregations