Search in sources :

Example 1 with TokenFilterFactory

use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.

the class TokenizerChain method toString.

@Override
public String toString() {
    StringBuilder sb = new StringBuilder("TokenizerChain(");
    for (CharFilterFactory filter : charFilters) {
        sb.append(filter);
        sb.append(", ");
    }
    sb.append(tokenizer);
    for (TokenFilterFactory filter : filters) {
        sb.append(", ");
        sb.append(filter);
    }
    sb.append(')');
    return sb.toString();
}
Also used : CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 2 with TokenFilterFactory

use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.

the class TokenizerChain method createComponents.

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tk = tokenizer.create(reader);
    TokenStream ts = tk;
    for (TokenFilterFactory filter : filters) {
        ts = filter.create(ts);
    }
    return new TokenStreamComponents(tk, ts);
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 3 with TokenFilterFactory

use of org.apache.lucene.analysis.util.TokenFilterFactory in project jackrabbit-oak by apache.

the class NodeStateAnalyzerFactory method composeAnalyzer.

private Analyzer composeAnalyzer(NodeState state) {
    TokenizerFactory tf = loadTokenizer(state.getChildNode(LuceneIndexConstants.ANL_TOKENIZER));
    CharFilterFactory[] cfs = loadCharFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_CHAR_FILTERS));
    TokenFilterFactory[] tffs = loadTokenFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_FILTERS));
    return new TokenizerChain(cfs, tf, tffs);
}
Also used : TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) TokenizerChain(org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 4 with TokenFilterFactory

use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.

the class TestAsciiFoldingFilterFactory method testMultiTermAnalysis.

public void testMultiTermAnalysis() throws IOException {
    TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
    TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
    factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) HashMap(java.util.HashMap) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 5 with TokenFilterFactory

use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.

the class TestSynonymFilterFactory method testAnalyzer.

/** Test that analyzer and tokenizerFactory is both specified */
public void testAnalyzer() throws Exception {
    final String analyzer = CJKAnalyzer.class.getName();
    final String tokenizerFactory = PatternTokenizerFactory.class.getName();
    TokenFilterFactory factory = null;
    factory = tokenFilterFactory("Synonym", "synonyms", "synonyms2.txt", "analyzer", analyzer);
    assertNotNull(factory);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        tokenFilterFactory("Synonym", "synonyms", "synonyms.txt", "analyzer", analyzer, "tokenizerFactory", tokenizerFactory);
    });
    assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified both"));
}
Also used : TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Aggregations

TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)40 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)16 Analyzer (org.apache.lucene.analysis.Analyzer)12 TokenizerChain (org.apache.solr.analysis.TokenizerChain)12 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)11 TokenStream (org.apache.lucene.analysis.TokenStream)10 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)5 IOException (java.io.IOException)4 StringReader (java.io.StringReader)4 Test (org.junit.Test)4 Reader (java.io.Reader)3 Map (java.util.Map)3 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)3 KeywordTokenizerFactory (org.apache.lucene.analysis.core.KeywordTokenizerFactory)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 SolrException (org.apache.solr.common.SolrException)3 JsonElement (com.google.gson.JsonElement)2