use of org.apache.lucene.analysis.TokenStream in project lucene-solr-analysis-turkish by iorixxx.
the class TestTurkishDeASCIIfyFilter method testDeAscii2.
public void testDeAscii2() throws Exception {
TokenStream stream = whitespaceMockTokenizer("tatlises akgunduz sakip cernobil baslattigi dayanikliklarini");
stream = new TurkishDeASCIIfyFilter(stream, false);
assertTokenStreamContents(stream, new String[] { "tatlıses", "akgündüz", "sakıp", "çernobil", "başlattığı", "dayanıklıklarını" });
}
use of org.apache.lucene.analysis.TokenStream in project languagetool by languagetool-org.
the class LanguageToolAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String s) {
Tokenizer tokenizer = new AnyCharTokenizer();
TokenStream result = new LanguageToolFilter(tokenizer, languageTool, toLowerCase);
return new TokenStreamComponents(tokenizer, result);
}
use of org.apache.lucene.analysis.TokenStream in project che by eclipse.
the class LuceneSearcher method makeAnalyzer.
protected Analyzer makeAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new WhitespaceTokenizer();
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
};
}
use of org.apache.lucene.analysis.TokenStream in project textdb by TextDB.
the class DataflowUtils method tokenizeQuery.
/**
* Tokenizes the query string using the given analyser
*
* @param luceneAnalyzer
* @param query
* @return ArrayList<String> list of results
*/
public static ArrayList<String> tokenizeQuery(Analyzer luceneAnalyzer, String query) {
ArrayList<String> result = new ArrayList<String>();
TokenStream tokenStream = luceneAnalyzer.tokenStream(null, new StringReader(query));
CharTermAttribute term = tokenStream.addAttribute(CharTermAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
result.add(term.toString());
}
tokenStream.close();
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestSuggestField method testTokenStream.
@Test
public void testTokenStream() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
SuggestField suggestField = new SuggestField("field", "input", 1);
BytesRef surfaceForm = new BytesRef("input");
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) {
output.writeVInt(surfaceForm.length);
output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
output.writeVInt(1 + 1);
output.writeByte(SuggestField.TYPE);
}
BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray());
TokenStream stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(analyzer, null));
assertTokenStreamContents(stream, new String[] { "input" }, null, null, new String[] { payload.utf8ToString() }, new int[] { 1 }, null, null);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(completionAnalyzer, null));
assertTokenStreamContents(stream, new String[] { "input" }, null, null, new String[] { payload.utf8ToString() }, new int[] { 1 }, null, null);
}
Aggregations