Search in sources :

Example 1 with PatternCaptureGroupTokenFilter

use of org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter in project jackrabbit-oak by apache.

the class DefaultAnalyzersConfigurationTest method setUp.

@Before
public void setUp() throws Exception {
    this.exactPathAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathIndexingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathSearchingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            TokenStream filter = new ReverseStringFilter(source);
            filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
            filter = new ReverseStringFilter(filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.directChildrenPathIndexingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            TokenStream filter = new ReverseStringFilter(source);
            filter = new LengthFilter(filter, 2, Integer.MAX_VALUE);
            filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
            filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
            filter = new ReverseStringFilter(filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.directChildrenPathSearchingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            return new TokenStreamComponents(source);
        }
    };
    this.allChildrenPathIndexingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new PathHierarchyTokenizer();
            TokenStream filter = new PatternCaptureGroupTokenFilter(source, false, Pattern.compile("((\\/).*)"));
            filter = new RemoveDuplicatesTokenFilter(filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.allChildrenPathSearchingAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new KeywordTokenizer();
            return new TokenStreamComponents(source);
        }
    };
}
Also used : RemoveDuplicatesTokenFilter(org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter) TokenStream(org.apache.lucene.analysis.TokenStream) PathHierarchyTokenizer(org.apache.lucene.analysis.path.PathHierarchyTokenizer) Analyzer(org.apache.lucene.analysis.Analyzer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer) PatternCaptureGroupTokenFilter(org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter) LengthFilter(org.apache.lucene.analysis.miscellaneous.LengthFilter) ReverseStringFilter(org.apache.lucene.analysis.reverse.ReverseStringFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer) PathHierarchyTokenizer(org.apache.lucene.analysis.path.PathHierarchyTokenizer) PatternReplaceFilter(org.apache.lucene.analysis.pattern.PatternReplaceFilter) Before(org.junit.Before)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 Tokenizer (org.apache.lucene.analysis.Tokenizer)1 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)1 LengthFilter (org.apache.lucene.analysis.miscellaneous.LengthFilter)1 RemoveDuplicatesTokenFilter (org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter)1 PathHierarchyTokenizer (org.apache.lucene.analysis.path.PathHierarchyTokenizer)1 PatternCaptureGroupTokenFilter (org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter)1 PatternReplaceFilter (org.apache.lucene.analysis.pattern.PatternReplaceFilter)1 ReverseStringFilter (org.apache.lucene.analysis.reverse.ReverseStringFilter)1 Before (org.junit.Before)1