use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class UkrainianMorfologikAnalyzer method createComponents.
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link MorfologikFilter} on the Ukrainian dictionary.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if (stemExclusionSet.isEmpty() == false) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
result = new MorfologikFilter(result, getDictionary());
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestJapaneseTokenizerFactory method testMode.
/**
* Test mode parameter: specifying normal mode
*/
public void testMode() throws IOException {
Map<String, String> args = new HashMap<>();
args.put("mode", "normal");
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(newAttributeFactory());
((Tokenizer) ts).setReader(new StringReader("シニアソフトウェアエンジニア"));
assertTokenStreamContents(ts, new String[] { "シニアソフトウェアエンジニア" });
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestJapaneseTokenizerFactory method testSimple.
public void testSimple() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String, String>());
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(newAttributeFactory());
((Tokenizer) ts).setReader(new StringReader("これは本ではない"));
assertTokenStreamContents(ts, new String[] { "これ", "は", "本", "で", "は", "ない" }, new int[] { 0, 2, 3, 4, 5, 6 }, new int[] { 2, 3, 4, 5, 6, 8 });
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestJapaneseTokenizerFactory method testDefaults.
/**
* Test that search mode is enabled and working by default
*/
public void testDefaults() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String, String>());
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(newAttributeFactory());
((Tokenizer) ts).setReader(new StringReader("シニアソフトウェアエンジニア"));
assertTokenStreamContents(ts, new String[] { "シニア", "シニアソフトウェアエンジニア", "ソフトウェア", "エンジニア" });
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class AnalyzerFactory method create.
public Analyzer create() {
return new Analyzer() {
private final Integer positionIncrementGap = AnalyzerFactory.this.positionIncrementGap;
private final Integer offsetGap = AnalyzerFactory.this.offsetGap;
@Override
public Reader initReader(String fieldName, Reader reader) {
if (charFilterFactories != null && charFilterFactories.size() > 0) {
Reader wrappedReader = reader;
for (CharFilterFactory charFilterFactory : charFilterFactories) {
wrappedReader = charFilterFactory.create(wrappedReader);
}
reader = wrappedReader;
}
return reader;
}
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = tokenizerFactory.create();
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory filterFactory : tokenFilterFactories) {
tokenStream = filterFactory.create(tokenStream);
}
return new TokenStreamComponents(tokenizer, tokenStream);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return null == positionIncrementGap ? super.getPositionIncrementGap(fieldName) : positionIncrementGap;
}
@Override
public int getOffsetGap(String fieldName) {
return null == offsetGap ? super.getOffsetGap(fieldName) : offsetGap;
}
};
}
Aggregations