Search in sources :

Example 1 with PreBuiltTokenizers

use of org.elasticsearch.indices.analysis.PreBuiltTokenizers in project elasticsearch by elastic.

the class AnalysisFactoryTestCase method testPreBuiltMultiTermAware.

public void testPreBuiltMultiTermAware() {
    Collection<Object> expected = new HashSet<>();
    Collection<Object> actual = new HashSet<>();
    for (Map.Entry<PreBuiltTokenizers, Class<?>> entry : PREBUILT_TOKENIZERS.entrySet()) {
        PreBuiltTokenizers tokenizer = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        if (luceneFactory == Void.class) {
            continue;
        }
        assertTrue(TokenizerFactory.class.isAssignableFrom(luceneFactory));
        if (tokenizer.getTokenizerFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
            actual.add(tokenizer);
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add(tokenizer);
        }
    }
    for (Map.Entry<PreBuiltTokenFilters, Class<?>> entry : PREBUILT_TOKENFILTERS.entrySet()) {
        PreBuiltTokenFilters tokenFilter = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        if (luceneFactory == Void.class) {
            continue;
        }
        assertTrue(TokenFilterFactory.class.isAssignableFrom(luceneFactory));
        if (tokenFilter.getTokenFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
            actual.add(tokenFilter);
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add(tokenFilter);
        }
    }
    for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) {
        PreBuiltCharFilters charFilter = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        if (luceneFactory == Void.class) {
            continue;
        }
        assertTrue(CharFilterFactory.class.isAssignableFrom(luceneFactory));
        if (charFilter.getCharFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
            actual.add(charFilter);
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add(charFilter);
        }
    }
    Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
    classesMissingMultiTermSupport.removeAll(actual);
    assertTrue("Pre-built components are missing multi-term support: " + classesMissingMultiTermSupport, classesMissingMultiTermSupport.isEmpty());
    Set<Object> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
    classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
    assertTrue("Pre-built components should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport, classesThatShouldNotHaveMultiTermSupport.isEmpty());
}
Also used : PreBuiltTokenizers(org.elasticsearch.indices.analysis.PreBuiltTokenizers) MultiTermAwareComponent(org.elasticsearch.index.analysis.MultiTermAwareComponent) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) ThaiTokenizerFactory(org.elasticsearch.index.analysis.ThaiTokenizerFactory) PathHierarchyTokenizerFactory(org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory) LowerCaseTokenizerFactory(org.elasticsearch.index.analysis.LowerCaseTokenizerFactory) WhitespaceTokenizerFactory(org.elasticsearch.index.analysis.WhitespaceTokenizerFactory) ClassicTokenizerFactory(org.elasticsearch.index.analysis.ClassicTokenizerFactory) KeywordTokenizerFactory(org.elasticsearch.index.analysis.KeywordTokenizerFactory) StandardTokenizerFactory(org.elasticsearch.index.analysis.StandardTokenizerFactory) LetterTokenizerFactory(org.elasticsearch.index.analysis.LetterTokenizerFactory) PatternTokenizerFactory(org.elasticsearch.index.analysis.PatternTokenizerFactory) EdgeNGramTokenizerFactory(org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory) UAX29URLEmailTokenizerFactory(org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory) NGramTokenizerFactory(org.elasticsearch.index.analysis.NGramTokenizerFactory) MappingCharFilterFactory(org.elasticsearch.index.analysis.MappingCharFilterFactory) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) PatternReplaceCharFilterFactory(org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory) HtmlStripCharFilterFactory(org.elasticsearch.index.analysis.HtmlStripCharFilterFactory) PreBuiltTokenFilters(org.elasticsearch.indices.analysis.PreBuiltTokenFilters) PreBuiltCharFilters(org.elasticsearch.indices.analysis.PreBuiltCharFilters) NGramTokenFilterFactory(org.elasticsearch.index.analysis.NGramTokenFilterFactory) StemmerOverrideTokenFilterFactory(org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory) ShingleTokenFilterFactory(org.elasticsearch.index.analysis.ShingleTokenFilterFactory) TruncateTokenFilterFactory(org.elasticsearch.index.analysis.TruncateTokenFilterFactory) LengthTokenFilterFactory(org.elasticsearch.index.analysis.LengthTokenFilterFactory) HunspellTokenFilterFactory(org.elasticsearch.index.analysis.HunspellTokenFilterFactory) FlattenGraphTokenFilterFactory(org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory) PatternReplaceTokenFilterFactory(org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory) StandardTokenFilterFactory(org.elasticsearch.index.analysis.StandardTokenFilterFactory) StopTokenFilterFactory(org.elasticsearch.index.analysis.StopTokenFilterFactory) BrazilianStemTokenFilterFactory(org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory) DictionaryCompoundWordTokenFilterFactory(org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory) StemmerTokenFilterFactory(org.elasticsearch.index.analysis.StemmerTokenFilterFactory) GermanStemTokenFilterFactory(org.elasticsearch.index.analysis.GermanStemTokenFilterFactory) ASCIIFoldingTokenFilterFactory(org.elasticsearch.index.analysis.ASCIIFoldingTokenFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) PorterStemTokenFilterFactory(org.elasticsearch.index.analysis.PorterStemTokenFilterFactory) LowerCaseTokenFilterFactory(org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory) MinHashTokenFilterFactory(org.elasticsearch.index.analysis.MinHashTokenFilterFactory) CommonGramsTokenFilterFactory(org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory) SynonymTokenFilterFactory(org.elasticsearch.index.analysis.SynonymTokenFilterFactory) TrimTokenFilterFactory(org.elasticsearch.index.analysis.TrimTokenFilterFactory) SynonymGraphTokenFilterFactory(org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory) CzechStemTokenFilterFactory(org.elasticsearch.index.analysis.CzechStemTokenFilterFactory) KStemTokenFilterFactory(org.elasticsearch.index.analysis.KStemTokenFilterFactory) UpperCaseTokenFilterFactory(org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory) WordDelimiterTokenFilterFactory(org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory) DelimitedPayloadTokenFilterFactory(org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory) ArabicStemTokenFilterFactory(org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory) ReverseTokenFilterFactory(org.elasticsearch.index.analysis.ReverseTokenFilterFactory) HyphenationCompoundWordTokenFilterFactory(org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory) ElisionTokenFilterFactory(org.elasticsearch.index.analysis.ElisionTokenFilterFactory) PatternCaptureGroupTokenFilterFactory(org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory) EdgeNGramTokenFilterFactory(org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory) SnowballTokenFilterFactory(org.elasticsearch.index.analysis.SnowballTokenFilterFactory) KeywordMarkerTokenFilterFactory(org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Aggregations

HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)1 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)1 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)1 ASCIIFoldingTokenFilterFactory (org.elasticsearch.index.analysis.ASCIIFoldingTokenFilterFactory)1 ArabicStemTokenFilterFactory (org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory)1 BrazilianStemTokenFilterFactory (org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory)1 ClassicTokenizerFactory (org.elasticsearch.index.analysis.ClassicTokenizerFactory)1 CommonGramsTokenFilterFactory (org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory)1 CzechStemTokenFilterFactory (org.elasticsearch.index.analysis.CzechStemTokenFilterFactory)1 DelimitedPayloadTokenFilterFactory (org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory)1 EdgeNGramTokenFilterFactory (org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory)1 EdgeNGramTokenizerFactory (org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory)1 ElisionTokenFilterFactory (org.elasticsearch.index.analysis.ElisionTokenFilterFactory)1 FlattenGraphTokenFilterFactory (org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory)1 GermanStemTokenFilterFactory (org.elasticsearch.index.analysis.GermanStemTokenFilterFactory)1 HtmlStripCharFilterFactory (org.elasticsearch.index.analysis.HtmlStripCharFilterFactory)1 HunspellTokenFilterFactory (org.elasticsearch.index.analysis.HunspellTokenFilterFactory)1