Search in sources :

Example 41 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestGermanLightStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("sängerinnen"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new GermanLightStemFilter(sink));
        }
    };
    checkOneTerm(a, "sängerinnen", "sängerinnen");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 42 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestCzechStemFilterFactory method testStemming.

/**
   * Ensure the filter actually stems text.
   */
public void testStemming() throws Exception {
    Reader reader = new StringReader("angličtí");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("CzechStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "anglick" });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 43 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestCzechStemmer method testWithKeywordAttribute.

public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(1, true);
    set.add("hole");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(new StringReader("hole desek"));
    CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(in, set));
    assertTokenStreamContents(filter, new String[] { "hole", "desk" });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) StringReader(java.io.StringReader)

Example 44 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestGermanNormalizationFilter method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String field) {
            final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            final TokenStream stream = new GermanNormalizationFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 45 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestSnowball method checkRandomStrings.

public void checkRandomStrings(final String snowballLanguage) throws IOException {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer();
            return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
        }
    };
    checkRandomData(random(), a, 100 * RANDOM_MULTIPLIER);
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Aggregations

MockTokenizer (org.apache.lucene.analysis.MockTokenizer)280 Tokenizer (org.apache.lucene.analysis.Tokenizer)204 Analyzer (org.apache.lucene.analysis.Analyzer)161 StringReader (java.io.StringReader)118 TokenStream (org.apache.lucene.analysis.TokenStream)116 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)106 Reader (java.io.Reader)59 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)54 CharArraySet (org.apache.lucene.analysis.CharArraySet)44 Directory (org.apache.lucene.store.Directory)36 Document (org.apache.lucene.document.Document)31 BytesRef (org.apache.lucene.util.BytesRef)25 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)21 TextField (org.apache.lucene.document.TextField)20 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)18 Field (org.apache.lucene.document.Field)17 FieldType (org.apache.lucene.document.FieldType)14 StringField (org.apache.lucene.document.StringField)11 Input (org.apache.lucene.search.suggest.Input)11 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)11