Search in sources :

Example 1 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestKeepFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(getClass());
    assertTrue("loader is null and it shouldn't be", loader != null);
    KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true");
    words = factory.getWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 2 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestCommonGramsFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
    assertTrue("loader is null and it shouldn't be", loader != null);
    CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getCommonWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getCommonWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getCommonWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 3 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestCommonGramsQueryFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
    assertTrue("loader is null and it shouldn't be", loader != null);
    CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getCommonWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getCommonWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getCommonWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 4 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestICUTokenizerFactory method testMixedText.

public void testMixedText() throws Exception {
    Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String, String>());
    factory.inform(new ClasspathResourceLoader(getClass()));
    Tokenizer stream = factory.create(newAttributeFactory());
    stream.setReader(reader);
    assertTokenStreamContents(stream, new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "This", "is", "a", "test", "ກວ່າ", "ດອກ" });
}
Also used : StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Example 5 with ClasspathResourceLoader

use of org.apache.lucene.analysis.util.ClasspathResourceLoader in project lucene-solr by apache.

the class TestICUTokenizerFactory method testTokenizeLatinOnWhitespaceOnly.

public void testTokenizeLatinOnWhitespaceOnly() throws Exception {
    // “ U+201C LEFT DOUBLE QUOTATION MARK; ” U+201D RIGHT DOUBLE QUOTATION MARK
    Reader reader = new StringReader("  Don't,break.at?/(punct)!  “nice”\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$   ");
    final Map<String, String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(this.getClass()));
    Tokenizer stream = factory.create(newAttributeFactory());
    stream.setReader(reader);
    assertTokenStreamContents(stream, new String[] { "Don't,break.at?/(punct)!", "“nice”", "85_At:all;", "`really\"", "+2=3$5,&813", "!@#%$^)(*@#$" }, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<OTHER>" });
}
Also used : HashMap(java.util.HashMap) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Aggregations

ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)25 HashMap (java.util.HashMap)16 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)7 StringReader (java.io.StringReader)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Tokenizer (org.apache.lucene.analysis.Tokenizer)5 Reader (java.io.Reader)4 JsonElement (com.google.gson.JsonElement)3 JsonObject (com.google.gson.JsonObject)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 JsonArray (com.google.gson.JsonArray)2 LinkedList (java.util.LinkedList)2 Caverphone2 (org.apache.commons.codec.language.Caverphone2)2 Metaphone (org.apache.commons.codec.language.Metaphone)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 IOException (java.io.IOException)1 NodeStateResourceLoader (org.apache.jackrabbit.oak.plugins.index.lucene.NodeStateAnalyzerFactory.NodeStateResourceLoader)1 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)1 LimitTokenCountFilterFactory (org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory)1 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)1