use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class AnalysisModuleTests method assertTokenFilter.
private void assertTokenFilter(String name, Class<?> clazz) throws IOException {
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get(name);
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream stream = tokenFilter.create(tokenizer);
assertThat(stream, instanceOf(clazz));
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project elasticsearch by elastic.
the class KeywordFieldTypeTests method testTermQueryWithNormalizer.
public void testTermQueryWithNormalizer() {
MappedFieldType ft = createDefaultFieldType();
ft.setName("field");
ft.setIndexOptions(IndexOptions.DOCS);
Analyzer normalizer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer in = new WhitespaceTokenizer();
TokenFilter out = new LowerCaseFilter(in);
return new TokenStreamComponents(in, out);
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
return new LowerCaseFilter(in);
}
};
ft.setSearchAnalyzer(new NamedAnalyzer("my_normalizer", AnalyzerScope.INDEX, normalizer));
assertEquals(new TermQuery(new Term("field", "foo bar")), ft.termQuery("fOo BaR", null));
ft.setIndexOptions(IndexOptions.NONE);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.termQuery("bar", null));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project lucene-solr-analysis-turkish by iorixxx.
the class Zemberek2DeASCIIfyFilterFactory method main.
public static void main(String[] args) throws IOException {
StringReader reader = new StringReader("kus asisi ortaklar çekişme masali");
Map<String, String> map = new HashMap<>();
Zemberek2DeASCIIfyFilterFactory factory = new Zemberek2DeASCIIfyFilterFactory(map);
WhitespaceTokenizer whitespaceTokenizer = new WhitespaceTokenizer();
whitespaceTokenizer.setReader(reader);
TokenStream stream = factory.create(whitespaceTokenizer);
CharTermAttribute termAttribute = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
String term = termAttribute.toString();
System.out.println(term);
}
stream.end();
reader.close();
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project lucene-solr-analysis-turkish by iorixxx.
the class Zemberek2StemFilterFactory method main.
public static void main(String[] args) throws IOException {
StringReader reader = new StringReader("elması utansın ortaklar çekişme ile");
Map<String, String> map = new HashMap<>();
map.put("strategy", "frequency");
Zemberek2StemFilterFactory factory = new Zemberek2StemFilterFactory(map);
WhitespaceTokenizer whitespaceTokenizer = new WhitespaceTokenizer();
whitespaceTokenizer.setReader(reader);
TokenStream stream = factory.create(whitespaceTokenizer);
CharTermAttribute termAttribute = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
String term = termAttribute.toString();
System.out.println(term);
}
stream.end();
reader.close();
}
use of org.apache.lucene.analysis.core.WhitespaceTokenizer in project neo4j by neo4j.
the class CustomAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
called = true;
Tokenizer source = new WhitespaceTokenizer();
return new TokenStreamComponents(source, new LowerCaseFilter(source));
}
Aggregations