use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class BaseWordDelimiterTokenFilterFactoryTestCase method testDefault.
public void testDefault() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_word_delimiter.type", type).build());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class BaseWordDelimiterTokenFilterFactoryTestCase method testCatenateWords.
public void testCatenateWords() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_word_delimiter.type", type).put("index.analysis.filter.my_word_delimiter.catenate_words", "true").put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false").build());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class BaseWordDelimiterTokenFilterFactoryTestCase method testCatenateNumbers.
public void testCatenateNumbers() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put("index.analysis.filter.my_word_delimiter.type", type).put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false").put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true").build());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class KeepFilterFactoryTests method testCaseInsensitiveMapping.
public void testCaseInsensitiveMapping() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keep_filter");
assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class));
String source = "hello small world";
String[] expected = new String[] { "hello", "world" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 1, 2 });
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class KeepFilterFactoryTests method testCaseSensitiveMapping.
public void testCaseSensitiveMapping() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_case_sensitive_keep_filter");
assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class));
String source = "Hello small world";
String[] expected = new String[] { "Hello" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 1 });
}
Aggregations