use of org.opensearch.index.analysis.TokenFilterFactory in project OpenSearch by opensearch-project.
the class ConcatenateGraphTokenFilterFactoryTests method testOldLuceneVersionNoSeparator.
public void testOldLuceneVersionNoSeparator() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2)).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_concatenate_graph.type", "concatenate_graph").put("index.analysis.filter.my_concatenate_graph.token_separator", // this will be ignored
"+").put("index.analysis.filter.my_concatenate_graph.preserve_separator", "false").build(), new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_concatenate_graph");
String source = "PowerShot Is AweSome";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
// earlier Lucene version will not add separator if preserve_separator is false
assertTokenStreamContents(tokenFilter.create(tokenizer), new String[] { "PowerShotIsAweSome" });
}
use of org.opensearch.index.analysis.TokenFilterFactory in project OpenSearch by opensearch-project.
the class ConcatenateGraphTokenFilterFactoryTests method testOldLuceneVersionSeparator.
public void testOldLuceneVersionSeparator() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2)).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_concatenate_graph.type", "concatenate_graph").put("index.analysis.filter.my_concatenate_graph.token_separator", // this will be ignored
"+").build(), new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_concatenate_graph");
String source = "PowerShot Is AweSome";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
// earlier Lucene version will only use Lucene's default separator
assertTokenStreamContents(tokenFilter.create(tokenizer), new String[] { "PowerShot" + ConcatenateGraphFilter.DEFAULT_TOKEN_SEPARATOR + "Is" + ConcatenateGraphFilter.DEFAULT_TOKEN_SEPARATOR + "AweSome" });
}
use of org.opensearch.index.analysis.TokenFilterFactory in project OpenSearch by opensearch-project.
the class EdgeNGramTokenFilterFactoryTests method testPreserveOriginal.
public void testPreserveOriginal() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_edge_ngram.type", "edge_ngram").put("index.analysis.filter.my_edge_ngram.preserve_original", true).build(), new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_edge_ngram");
String source = "foo";
String[] expected = new String[] { "f", "fo", "foo" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
use of org.opensearch.index.analysis.TokenFilterFactory in project OpenSearch by opensearch-project.
the class KeepFilterFactoryTests method testLoadWithoutSettings.
public void testLoadWithoutSettings() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("keep");
Assert.assertNull(tokenFilter);
}
use of org.opensearch.index.analysis.TokenFilterFactory in project OpenSearch by opensearch-project.
the class ScriptedConditionTokenFilterFactory method getChainAwareTokenFilterFactory.
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters, Function<String, TokenFilterFactory> allFilters) {
List<TokenFilterFactory> filters = new ArrayList<>();
List<TokenFilterFactory> existingChain = new ArrayList<>(previousTokenFilters);
for (String filter : filterNames) {
TokenFilterFactory tff = allFilters.apply(filter);
if (tff == null) {
throw new IllegalArgumentException("ScriptedConditionTokenFilter [" + name() + "] refers to undefined token filter [" + filter + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters);
filters.add(tff);
existingChain.add(tff);
}
return new TokenFilterFactory() {
@Override
public String name() {
return ScriptedConditionTokenFilterFactory.this.name();
}
@Override
public TokenStream create(TokenStream tokenStream) {
Function<TokenStream, TokenStream> filter = in -> {
for (TokenFilterFactory tff : filters) {
in = tff.create(in);
}
return in;
};
return new ScriptedConditionTokenFilter(tokenStream, filter, factory.newInstance());
}
};
}
Aggregations