Search in sources :

Example 21 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.

the class TestFactories method doTestCharFilter.

private void doTestCharFilter(String charfilter) throws IOException {
    Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
    CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a tokenizer or tokenfilter here, this makes no sense
            assertTrue(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 22 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.

the class FieldTypePluginLoader method constructMultiTermAnalyzer.

// The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
// 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
//    lowercase filters and asciifoldingfilter.
// 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
//    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null)
        return null;
    if (!(queryAnalyzer instanceof TokenizerChain)) {
        return new KeywordAnalyzer();
    }
    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();
    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
    for (CharFilterFactory fact : charFactories) {
        builder.add(fact);
    }
    builder.add(tc.getTokenizerFactory());
    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
        builder.add(fact);
    }
    return builder.build();
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TokenizerChain(org.apache.solr.analysis.TokenizerChain) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 23 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.

the class AnalyzerFactory method toString.

@Override
public String toString() {
    StringBuilder sb = new StringBuilder("AnalyzerFactory(");
    if (null != name) {
        sb.append("name:");
        sb.append(name);
        sb.append(", ");
    }
    if (null != positionIncrementGap) {
        sb.append("positionIncrementGap:");
        sb.append(positionIncrementGap);
        sb.append(", ");
    }
    if (null != offsetGap) {
        sb.append("offsetGap:");
        sb.append(offsetGap);
        sb.append(", ");
    }
    for (CharFilterFactory charFilterFactory : charFilterFactories) {
        sb.append(charFilterFactory);
        sb.append(", ");
    }
    sb.append(tokenizerFactory);
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
        sb.append(", ");
        sb.append(tokenFilterFactory);
    }
    sb.append(')');
    return sb.toString();
}
Also used : CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 24 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.

the class TestAllAnalyzersHaveFactories method test.

public void test() throws Exception {
    List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
    for (final Class<?> c : analysisClasses) {
        final int modifiers = c.getModifiers();
        if (// don't waste time with abstract classes
        Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface() || testComponents.contains(c) || crazyComponents.contains(c) || oddlyNamedComponents.contains(c) || tokenFiltersWithoutFactory.contains(c) || // deprecated ones are typically back compat hacks
        c.isAnnotationPresent(Deprecated.class) || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))) {
            continue;
        }
        Map<String, String> args = new HashMap<>();
        args.put("luceneMatchVersion", Version.LATEST.toString());
        if (Tokenizer.class.isAssignableFrom(c)) {
            String clazzName = c.getSimpleName();
            assertTrue(clazzName.endsWith("Tokenizer"));
            String simpleName = clazzName.substring(0, clazzName.length() - 9);
            assertNotNull(TokenizerFactory.lookupClass(simpleName));
            TokenizerFactory instance = null;
            try {
                instance = TokenizerFactory.forName(simpleName, args);
                assertNotNull(instance);
                if (instance instanceof ResourceLoaderAware) {
                    ((ResourceLoaderAware) instance).inform(loader);
                }
                assertSame(c, instance.create().getClass());
            } catch (IllegalArgumentException e) {
            // TODO: For now pass because some factories have not yet a default config that always works
            }
        } else if (TokenFilter.class.isAssignableFrom(c)) {
            String clazzName = c.getSimpleName();
            assertTrue(clazzName.endsWith("Filter"));
            String simpleName = clazzName.substring(0, clazzName.length() - (clazzName.endsWith("TokenFilter") ? 11 : 6));
            assertNotNull(TokenFilterFactory.lookupClass(simpleName));
            TokenFilterFactory instance = null;
            try {
                instance = TokenFilterFactory.forName(simpleName, args);
                assertNotNull(instance);
                if (instance instanceof ResourceLoaderAware) {
                    ((ResourceLoaderAware) instance).inform(loader);
                }
                Class<? extends TokenStream> createdClazz = instance.create(new KeywordTokenizer()).getClass();
                // only check instance if factory have wrapped at all!
                if (KeywordTokenizer.class != createdClazz) {
                    assertSame(c, createdClazz);
                }
            } catch (IllegalArgumentException e) {
            // TODO: For now pass because some factories have not yet a default config that always works
            }
        } else if (CharFilter.class.isAssignableFrom(c)) {
            String clazzName = c.getSimpleName();
            assertTrue(clazzName.endsWith("CharFilter"));
            String simpleName = clazzName.substring(0, clazzName.length() - 10);
            assertNotNull(CharFilterFactory.lookupClass(simpleName));
            CharFilterFactory instance = null;
            try {
                instance = CharFilterFactory.forName(simpleName, args);
                assertNotNull(instance);
                if (instance instanceof ResourceLoaderAware) {
                    ((ResourceLoaderAware) instance).inform(loader);
                }
                Class<? extends Reader> createdClazz = instance.create(new StringReader("")).getClass();
                // only check instance if factory have wrapped at all!
                if (StringReader.class != createdClazz) {
                    assertSame(c, createdClazz);
                }
            } catch (IllegalArgumentException e) {
            // TODO: For now pass because some factories have not yet a default config that always works
            }
        }
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) StringReader(java.io.StringReader) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) ReversePathHierarchyTokenizer(org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer) TeeSinkTokenFilter(org.apache.lucene.analysis.sinks.TeeSinkTokenFilter) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) MockGraphTokenFilter(org.apache.lucene.analysis.MockGraphTokenFilter) ValidatingTokenFilter(org.apache.lucene.analysis.ValidatingTokenFilter) MockRandomLookaheadTokenFilter(org.apache.lucene.analysis.MockRandomLookaheadTokenFilter) CrankyTokenFilter(org.apache.lucene.analysis.CrankyTokenFilter) MockTokenFilter(org.apache.lucene.analysis.MockTokenFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter) MockHoleInjectingTokenFilter(org.apache.lucene.analysis.MockHoleInjectingTokenFilter)

Example 25 with CharFilterFactory

use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.

the class TestFactories method doTestCharFilter.

private void doTestCharFilter(String charfilter) throws IOException {
    Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
    CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a tokenizer or tokenfilter here, this makes no sense
            assertTrue(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)26 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)16 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)12 Analyzer (org.apache.lucene.analysis.Analyzer)7 MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)6 TokenizerChain (org.apache.solr.analysis.TokenizerChain)5 Reader (java.io.Reader)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)4 StringReader (java.io.StringReader)3 Map (java.util.Map)3 TokenStream (org.apache.lucene.analysis.TokenStream)3 Tokenizer (org.apache.lucene.analysis.Tokenizer)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 SolrException (org.apache.solr.common.SolrException)3 JsonElement (com.google.gson.JsonElement)2 JsonObject (com.google.gson.JsonObject)2 IOException (java.io.IOException)2 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)2