use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.
the class TokenizerChain method toString.
@Override
public String toString() {
StringBuilder sb = new StringBuilder("TokenizerChain(");
for (CharFilterFactory filter : charFilters) {
sb.append(filter);
sb.append(", ");
}
sb.append(tokenizer);
for (TokenFilterFactory filter : filters) {
sb.append(", ");
sb.append(filter);
}
sb.append(')');
return sb.toString();
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.
the class TokenizerChain method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tk = tokenizer.create(reader);
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);
}
return new TokenStreamComponents(tk, ts);
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactory method composeAnalyzer.
private Analyzer composeAnalyzer(NodeState state) {
TokenizerFactory tf = loadTokenizer(state.getChildNode(LuceneIndexConstants.ANL_TOKENIZER));
CharFilterFactory[] cfs = loadCharFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_CHAR_FILTERS));
TokenFilterFactory[] tffs = loadTokenFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_FILTERS));
return new TokenizerChain(cfs, tf, tffs);
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class TestAsciiFoldingFilterFactory method testMultiTermAnalysis.
public void testMultiTermAnalysis() throws IOException {
TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class TestSynonymFilterFactory method testAnalyzer.
/** Test that analyzer and tokenizerFactory is both specified */
public void testAnalyzer() throws Exception {
final String analyzer = CJKAnalyzer.class.getName();
final String tokenizerFactory = PatternTokenizerFactory.class.getName();
TokenFilterFactory factory = null;
factory = tokenFilterFactory("Synonym", "synonyms", "synonyms2.txt", "analyzer", analyzer);
assertNotNull(factory);
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
tokenFilterFactory("Synonym", "synonyms", "synonyms.txt", "analyzer", analyzer, "tokenizerFactory", tokenizerFactory);
});
assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified both"));
}
Aggregations