use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPortugueseMinimalStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("quilométricas"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink));
}
};
checkOneTerm(a, "quilométricas", "quilométricas");
a.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPortugueseLightStemFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPortugueseMinimalStemFilterFactory method testStemming.
public void testStemming() throws Exception {
Reader reader = new StringReader("questões");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(reader);
stream = tokenFilterFactory("PortugueseMinimalStem").create(stream);
assertTokenStreamContents(stream, new String[] { "questão" });
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPortugueseStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("quilométricas"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new PortugueseStemFilter(sink));
}
};
checkOneTerm(a, "quilométricas", "quilométricas");
a.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestSimplePatternSplitTokenizer method testEndLookahead.
public void testEndLookahead() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("(ab)+");
t.setReader(new StringReader("aba"));
assertTokenStreamContents(t, new String[] { "a" }, new int[] { 2 }, new int[] { 3 }, 3);
}
Aggregations