use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestKeepFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true");
words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestCommonGramsFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getCommonWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestCommonGramsQueryFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getCommonWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestICUTokenizerFactory method testMixedText.
public void testMixedText() throws Exception {
Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี This is a test ກວ່າດອກ");
ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String, String>());
factory.inform(new ClasspathResourceLoader(getClass()));
Tokenizer stream = factory.create(newAttributeFactory());
stream.setReader(reader);
assertTokenStreamContents(stream, new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "This", "is", "a", "test", "ກວ່າ", "ດອກ" });
}
use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.
the class TestICUTokenizerFactory method testTokenizeLatinOnWhitespaceOnly.
public void testTokenizeLatinOnWhitespaceOnly() throws Exception {
// “ U+201C LEFT DOUBLE QUOTATION MARK; ” U+201D RIGHT DOUBLE QUOTATION MARK
Reader reader = new StringReader(" Don't,break.at?/(punct)! “nice”\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$ ");
final Map<String, String> args = new HashMap<>();
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(this.getClass()));
Tokenizer stream = factory.create(newAttributeFactory());
stream.setReader(reader);
assertTokenStreamContents(stream, new String[] { "Don't,break.at?/(punct)!", "“nice”", "85_At:all;", "`really\"", "+2=3$5,&813", "!@#%$^)(*@#$" }, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<OTHER>" });
}
Aggregations