use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestHunspellStemFilter method testKeywordAttribute.
/** Simple test for KeywordAttribute */
public void testKeywordAttribute() throws IOException {
MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
tokenizer.setEnableChecks(true);
HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);
assertTokenStreamContents(filter, new String[] { "lucene", "lucen", "is", "awesome" }, new int[] { 1, 0, 1, 1 });
// assert with keyword marker
tokenizer = whitespaceMockTokenizer("lucene is awesome");
CharArraySet set = new CharArraySet(Arrays.asList("Lucene"), true);
filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
assertTokenStreamContents(filter, new String[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestFilesystemResourceLoader method assertClasspathDelegation.
private void assertClasspathDelegation(ResourceLoader rl) throws Exception {
// try a stopwords file from classpath
CharArraySet set = WordlistLoader.getSnowballWordSet(new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8));
assertTrue(set.contains("you"));
// try to load a class; we use string comparison because classloader may be different...
assertEquals("org.apache.lucene.analysis.util.RollingCharBuffer", rl.newInstance("org.apache.lucene.analysis.util.RollingCharBuffer", Object.class).getClass().getName());
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestHungarianLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("babakocsi"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new HungarianLightStemFilter(sink));
}
};
checkOneTerm(a, "babakocsi", "babakocsi");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestFrenchAnalyzer method testExclusionTableViaCtor.
public void testExclusionTableViaCtor() throws Exception {
CharArraySet set = new CharArraySet(1, true);
set.add("habitable");
FrenchAnalyzer fa = new FrenchAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" });
fa.close();
fa = new FrenchAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" });
fa.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestGalicianMinimalStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("elefantes"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink));
}
};
checkOneTerm(a, "elefantes", "elefantes");
a.close();
}
Aggregations