use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class UkrainianMorfologikAnalyzer method createComponents.
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link MorfologikFilter} on the Ukrainian dictionary.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if (stemExclusionSet.isEmpty() == false) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
result = new MorfologikFilter(result, getDictionary());
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class SoraniAnalyzer method createComponents.
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link SoraniNormalizationFilter},
* {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SoraniStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new SoraniNormalizationFilter(result);
result = new LowerCaseFilter(result);
result = new DecimalDigitFilter(result);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SoraniStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestGermanAnalyzer method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("fischen");
final LowerCaseTokenizer in = new LowerCaseTokenizer();
in.setReader(new StringReader("Fischen Trinken"));
GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(in, set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestGermanMinimalStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("sängerinnen"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink));
}
};
checkOneTerm(a, "sängerinnen", "sängerinnen");
a.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestSwedishLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("jaktkarlens"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new SwedishLightStemFilter(sink));
}
};
checkOneTerm(a, "jaktkarlens", "jaktkarlens");
a.close();
}
Aggregations