use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCharArraySet method testContainsWithNull.
/**
* Test for NPE
*/
public void testContainsWithNull() {
CharArraySet set = new CharArraySet(1, true);
expectThrows(NullPointerException.class, () -> {
set.contains((char[]) null, 0, 10);
});
expectThrows(NullPointerException.class, () -> {
set.contains((CharSequence) null);
});
expectThrows(NullPointerException.class, () -> {
set.contains((Object) null);
});
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCommonGramsFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getCommonWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCommonGramsQueryFilterFactory method testDefaults.
/**
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue(words.contains("the"));
Tokenizer tokenizer = whitespaceMockTokenizer("testing the factory");
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "testing_the", "the_factory" });
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCommonGramsQueryFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getCommonWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestCompoundWordTokenFilter method testDumbCompoundWordsSELongestMatch.
public void testDumbCompoundWordsSELongestMatch() throws Exception {
CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(whitespaceMockTokenizer("Basfiolsfodralmakaregesäll"), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
assertTokenStreamContents(tf, new String[] { "Basfiolsfodralmakaregesäll", "Bas", "fiolsfodral", "fodral", "makare", "gesäll" }, new int[] { 0, 0, 0, 0, 0, 0 }, new int[] { 26, 26, 26, 26, 26, 26 }, new int[] { 1, 0, 0, 0, 0, 0 });
}
Aggregations