use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class AbstractAnalysisFactory method getWordSet.
/**
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
}
}
return words;
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCatalanAnalyzer method testExclude.
/** test use of exclusion set */
public void testExclude() throws IOException {
CharArraySet exclusionSet = new CharArraySet(asSet("llengües"), false);
Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "llengües", "llengües");
checkOneTerm(a, "llengua", "llengu");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestArabicAnalyzer method testWithStemExclusionSet.
public void testWithStemExclusionSet() throws IOException {
CharArraySet set = new CharArraySet(asSet("ساهدهات"), false);
ArabicAnalyzer a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير", "the", "quick", "ساهدهات" });
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير", "the", "quick", "ساهدهات" });
a.close();
a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير", "the", "quick", "ساهد" });
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير", "the", "quick", "ساهد" });
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestArabicAnalyzer method testCustomStopwords.
/**
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
CharArraySet set = new CharArraySet(asSet("the", "and", "a"), false);
ArabicAnalyzer a = new ArabicAnalyzer(set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" });
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestKeepFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true");
words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
Aggregations