use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestFrenchLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("chevaux"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new FrenchLightStemFilter(sink));
}
};
checkOneTerm(a, "chevaux", "chevaux");
a.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestFrenchMinimalStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("chevaux"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink));
}
};
checkOneTerm(a, "chevaux", "chevaux");
a.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestPorterStemFilter method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("yourselves");
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("yourselves yours"));
TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] { "yourselves", "your" });
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestJapaneseNumberFilter method testName.
@Test
public void testName() throws IOException {
// Test name that normalises to number
assertAnalyzesTo(analyzer, "田中京一", // 京一 is normalized to a number
new String[] { "田中", "10000000000000001" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 });
// An analyzer that marks 京一 as a keyword
Analyzer keywordMarkingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
CharArraySet set = new CharArraySet(1, false);
set.add("京一");
Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, false, JapaneseTokenizer.Mode.SEARCH);
return new TokenStreamComponents(tokenizer, new JapaneseNumberFilter(new SetKeywordMarkerFilter(tokenizer, set)));
}
};
assertAnalyzesTo(keywordMarkingAnalyzer, "田中京一", // 京一 is not normalized
new String[] { "田中", "京一" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 });
keywordMarkingAnalyzer.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestNorwegianLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("sekretæren"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink));
}
};
checkOneTerm(a, "sekretæren", "sekretæren");
a.close();
}
Aggregations