Search in sources :

Example 1 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TokenizerChain method normalize.

@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
    TokenStream result = in;
    for (TokenFilterFactory filter : filters) {
        if (filter instanceof MultiTermAwareComponent) {
            filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
            result = filter.create(in);
        }
    }
    return result;
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 2 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestFactories method doTestTokenFilter.

private void doTestTokenFilter(String tokenfilter) throws IOException {
    Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
    TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a charfilter or tokenizer here, this makes no sense
            assertTrue(mtc instanceof TokenFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 3 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestFactories method doTestTokenizer.

private void doTestTokenizer(String tokenizer) throws IOException {
    Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
    TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
            assertFalse(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(factory, null, null);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 4 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class TestAsciiFoldingFilterFactory method testMultiTermAnalysis.

public void testMultiTermAnalysis() throws IOException {
    TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
    TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
    factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
    stream = new CannedTokenStream(new Token("Été", 0, 3));
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] { "Ete" });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) HashMap(java.util.HashMap) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 5 with MultiTermAwareComponent

use of org.apache.lucene.analysis.util.MultiTermAwareComponent in project lucene-solr by apache.

the class CustomAnalyzer method initReaderForNormalization.

@Override
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
    for (CharFilterFactory charFilter : charFilters) {
        if (charFilter instanceof MultiTermAwareComponent) {
            charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
            reader = charFilter.create(reader);
        }
    }
    return reader;
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory)

Aggregations

MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)11 Analyzer (org.apache.lucene.analysis.Analyzer)6 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)6 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)6 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)5 TokenStream (org.apache.lucene.analysis.TokenStream)2 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)2 HashMap (java.util.HashMap)1 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)1 Token (org.apache.lucene.analysis.Token)1