use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestGermanAnalyzer method testStemExclusionTable.
public void testStemExclusionTable() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(CharArraySet.EMPTY_SET, new CharArraySet(asSet("tischen"), false));
checkOneTerm(a, "tischen", "tischen");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestGermanMinimalStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("sängerinnen"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink));
}
};
checkOneTerm(a, "sängerinnen", "sängerinnen");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestStopFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getStopWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
// defaults
factory = (StopFilterFactory) tokenFilterFactory("Stop");
assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
assertEquals(false, factory.isIgnoreCase());
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCzechAnalyzer method testWithStemExclusionSet.
public void testWithStemExclusionSet() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("hole");
CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] { "hole", "desk" });
cz.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestSwedishAnalyzer method testExclude.
/** test use of exclusion set */
public void testExclude() throws IOException {
CharArraySet exclusionSet = new CharArraySet(asSet("jaktkarlarne"), false);
Analyzer a = new SwedishAnalyzer(SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "jaktkarlarne", "jaktkarlarne");
checkOneTerm(a, "jaktkarlens", "jaktkarl");
a.close();
}
Aggregations