Search in sources :

Example 76 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestGermanAnalyzer method testStemExclusionTable.

public void testStemExclusionTable() throws Exception {
    GermanAnalyzer a = new GermanAnalyzer(CharArraySet.EMPTY_SET, new CharArraySet(asSet("tischen"), false));
    checkOneTerm(a, "tischen", "tischen");
    a.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 77 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestGermanMinimalStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("sängerinnen"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink));
        }
    };
    checkOneTerm(a, "sängerinnen", "sängerinnen");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 78 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestStopFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(getClass());
    assertTrue("loader is null and it shouldn't be", loader != null);
    StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getStopWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
    // defaults
    factory = (StopFilterFactory) tokenFilterFactory("Stop");
    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
    assertEquals(false, factory.isIgnoreCase());
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 79 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestCzechAnalyzer method testWithStemExclusionSet.

public void testWithStemExclusionSet() throws IOException {
    CharArraySet set = new CharArraySet(1, true);
    set.add("hole");
    CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(cz, "hole desek", new String[] { "hole", "desk" });
    cz.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet)

Example 80 with CharArraySet

use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.

the class TestSwedishAnalyzer method testExclude.

/** test use of exclusion set */
public void testExclude() throws IOException {
    CharArraySet exclusionSet = new CharArraySet(asSet("jaktkarlarne"), false);
    Analyzer a = new SwedishAnalyzer(SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTerm(a, "jaktkarlarne", "jaktkarlarne");
    checkOneTerm(a, "jaktkarlens", "jaktkarl");
    a.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

CharArraySet (org.apache.lucene.analysis.CharArraySet)137 Analyzer (org.apache.lucene.analysis.Analyzer)54 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)46 Tokenizer (org.apache.lucene.analysis.Tokenizer)43 TokenStream (org.apache.lucene.analysis.TokenStream)37 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)34 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)26 StringReader (java.io.StringReader)23 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)9 StopFilter (org.apache.lucene.analysis.StopFilter)7 TokenFilter (org.apache.lucene.analysis.TokenFilter)6 WordDelimiterFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter)5 WordDelimiterGraphFilter (org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter)5 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 HyphenationTree (org.apache.lucene.analysis.compound.hyphenation.HyphenationTree)4 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)4 InputSource (org.xml.sax.InputSource)4 Reader (java.io.Reader)3 ArrayList (java.util.ArrayList)3