Search in sources :

Example 86 with TokenStream

use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.

the class TestPortugueseLightStemFilterFactory method testStemming.

public void testStemming() throws Exception {
    Reader reader = new StringReader("evidentemente");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("PortugueseLightStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "evident" });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 87 with TokenStream

use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.

the class TestPortugueseLightStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("quilométricas"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink));
        }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 88 with TokenStream

use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.

the class TestPortugueseMinimalStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("quilométricas"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink));
        }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 89 with TokenStream

use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.

the class TestPortugueseMinimalStemFilterFactory method testStemming.

public void testStemming() throws Exception {
    Reader reader = new StringReader("questões");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("PortugueseMinimalStem").create(stream);
    assertTokenStreamContents(stream, new String[] { "questão" });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 90 with TokenStream

use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.

the class TestPortugueseStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("quilométricas"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new PortugueseStemFilter(sink));
        }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Aggregations

TokenStream (org.apache.lucene.analysis.TokenStream)849 StringReader (java.io.StringReader)337 Tokenizer (org.apache.lucene.analysis.Tokenizer)244 Reader (java.io.Reader)175 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)141 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)128 Analyzer (org.apache.lucene.analysis.Analyzer)121 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)94 LowerCaseFilter (org.apache.lucene.analysis.LowerCaseFilter)88 IOException (java.io.IOException)86 StandardFilter (org.apache.lucene.analysis.standard.StandardFilter)73 Term (org.apache.lucene.index.Term)66 Document (org.apache.lucene.document.Document)64 ArrayList (java.util.ArrayList)59 StandardTokenizer (org.apache.lucene.analysis.standard.StandardTokenizer)59 StopFilter (org.apache.lucene.analysis.StopFilter)58 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)57 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)53 Test (org.junit.Test)53 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)47