use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestSimplePatternTokenizer method testBasic.
public void testBasic() throws Exception {
// get stuff between "'"
String qpattern = "\\'([^\\']+)\\'";
String[][] tests = { // pattern input output
{ ":", "boo:and:foo", ": :" }, { qpattern, "aaa 'bbb' 'ccc'", "'bbb' 'ccc'" } };
for (String[] test : tests) {
TokenStream stream = new SimplePatternTokenizer(test[0]);
((Tokenizer) stream).setReader(new StringReader(test[1]));
String out = tsToString(stream);
assertEquals("pattern: " + test[0] + " with input: " + test[1], test[2], out);
}
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestReverseStringFilter method testFilterWithMark.
public void testFilterWithMark() throws Exception {
// 1-4 length string
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(new StringReader("Do have a nice day"));
ReverseStringFilter filter = new ReverseStringFilter(stream, '');
assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestDelimitedPayloadTokenFilterFactory method testDelim.
public void testDelim() throws Exception {
Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(reader);
stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream);
stream.reset();
while (stream.incrementToken()) {
PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
assertNotNull(payAttr);
byte[] payData = payAttr.getPayload().bytes;
assertNotNull(payData);
float payFloat = PayloadHelper.decodeFloat(payData);
assertEquals(0.1f, payFloat, 0.0f);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestDelimitedPayloadTokenFilterFactory method testEncoder.
public void testEncoder() throws Exception {
Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(reader);
stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
stream.reset();
while (stream.incrementToken()) {
PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
assertNotNull(payAttr);
byte[] payData = payAttr.getPayload().bytes;
assertNotNull(payData);
float payFloat = PayloadHelper.decodeFloat(payData);
assertEquals(0.1f, payFloat, 0.0f);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestRussianLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("энергии"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new RussianLightStemFilter(sink));
}
};
checkOneTerm(a, "энергии", "энергии");
a.close();
}
Aggregations