use of org.apache.lucene.analysis.TokenStream in project elasticsearch by elastic.
the class SimpleUkrainianAnalyzerTests method testAnalyzer.
private static void testAnalyzer(String source, String... expected_terms) throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer();
TokenStream ts = analyzer.tokenStream("test", source);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
for (String expected : expected_terms) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
assertThat(ts.incrementToken(), equalTo(false));
}
use of org.apache.lucene.analysis.TokenStream in project elasticsearch by elastic.
the class NGramTokenizerFactoryTests method testBackwardsCompatibilityEdgeNgramTokenFilter.
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
final Index index = new Index("test", "_na_");
final String name = "ngr";
Version v = randomVersion(random());
Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
boolean reverse = random().nextBoolean();
if (reverse) {
builder.put("side", "back");
}
Settings settings = builder.build();
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
}
}
}
use of org.apache.lucene.analysis.TokenStream in project elasticsearch by elastic.
the class CompoundAnalysisTests method analyze.
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", text, 1.0f);
TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
use of org.apache.lucene.analysis.TokenStream in project elasticsearch by elastic.
the class StemmerTokenFilterFactoryTests method testEnglishFilterFactory.
public void testEnglishFilterFactory() throws IOException {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer").put("index.analysis.filter.my_english.language", "english").put("index.analysis.analyzer.my_english.tokenizer", "whitespace").put("index.analysis.analyzer.my_english.filter", "my_english").put(SETTING_VERSION_CREATED, v).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream create = tokenFilter.create(tokenizer);
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_english");
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[] { "consolingli" });
}
}
use of org.apache.lucene.analysis.TokenStream in project elasticsearch by elastic.
the class DocumentFieldMapperTests method assertAnalyzes.
private void assertAnalyzes(Analyzer analyzer, String field, String output) throws IOException {
try (TokenStream tok = analyzer.tokenStream(field, new StringReader(""))) {
CharTermAttribute term = tok.addAttribute(CharTermAttribute.class);
assertTrue(tok.incrementToken());
assertEquals(output, term.toString());
}
}
Aggregations