Search in sources :

Example 6 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestFactories method doTestTokenizer.

private void doTestTokenizer(String tokenizer) throws IOException {
    Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
    TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
            assertFalse(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(factory, null, null);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 7 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class CustomAnalyzer method normalize.

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
    TokenStream result = in;
    for (TokenFilterFactory filter : tokenFilters) {
        if (filter instanceof MultiTermAwareComponent) {
            filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
            result = filter.create(result);
        }
    }
    return result;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 8 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TokenizerChain method initReaderForNormalization.

@Override
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
    if (charFilters != null && charFilters.length > 0) {
        for (CharFilterFactory charFilter : charFilters) {
            if (charFilter instanceof MultiTermAwareComponent) {
                charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
                reader = charFilter.create(reader);
            }
        }
    }
    return reader;
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory)

Example 9 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestFactories method doTestTokenFilter.

private void doTestTokenFilter(String tokenfilter) throws IOException {
    Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
    TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a charfilter or tokenizer here, this makes no sense
            assertTrue(mtc instanceof TokenFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 10 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestFactories method doTestCharFilter.

private void doTestCharFilter(String charfilter) throws IOException {
    Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
    CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a tokenizer or tokenfilter here, this makes no sense
            assertTrue(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)11 Analyzer (org.apache.lucene.analysis.Analyzer)6 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)6 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)6 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)5 TokenStream (org.apache.lucene.analysis.TokenStream)2 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)2 HashMap (java.util.HashMap)1 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)1 Token (org.apache.lucene.analysis.Token)1