use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestICUFoldingFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestICUTransformFilter method testOptimizerSurrogate.
public void testOptimizerSurrogate() throws Exception {
// convert CJK UNIFIED IDEOGRAPH-20087 to an x
String rules = "\\U00020087 > x;";
Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
assertTrue(custom.getFilter() == null);
final KeywordTokenizer input = new KeywordTokenizer();
input.setReader(new StringReader(""));
new ICUTransformFilter(input, custom);
assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project jackrabbit-oak by apache.
the class DefaultAnalyzersConfigurationTest method setUp.
@Before
public void setUp() throws Exception {
this.exactPathAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
return new TokenStreamComponents(source);
}
};
this.parentPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
return new TokenStreamComponents(source);
}
};
this.parentPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
TokenStream filter = new ReverseStringFilter(source);
filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
filter = new ReverseStringFilter(filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
TokenStream filter = new ReverseStringFilter(source);
filter = new LengthFilter(filter, 2, Integer.MAX_VALUE);
filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
filter = new ReverseStringFilter(filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
return new TokenStreamComponents(source);
}
};
this.allChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new PathHierarchyTokenizer();
TokenStream filter = new PatternCaptureGroupTokenFilter(source, false, Pattern.compile("((\\/).*)"));
filter = new RemoveDuplicatesTokenFilter(filter);
return new TokenStreamComponents(source, filter);
}
};
this.allChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new KeywordTokenizer();
return new TokenStreamComponents(source);
}
};
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project stargate-core by tuplejump.
the class CaseInsensitiveKeywordAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
KeywordTokenizer source = new KeywordTokenizer();
LowerCaseFilter filter = new LowerCaseFilter(source);
return new TokenStreamComponents(source, filter);
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-skos by behas.
the class SKOSAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fileName) {
if (expansionType.equals(ExpansionType.URI)) {
final KeywordTokenizer src = new KeywordTokenizer();
TokenStream tok = new SKOSURIFilter(src, skosEngine, new StandardAnalyzer(), types);
tok = new LowerCaseFilter(tok);
return new TokenStreamComponents(src, tok);
} else {
final StandardTokenizer src = new StandardTokenizer();
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new StandardFilter(src);
// prior to this we get the classic behavior, standardfilter does it for us.
tok = new SKOSLabelFilter(tok, skosEngine, new StandardAnalyzer(), bufferSize, types);
tok = new LowerCaseFilter(tok);
tok = new StopFilter(tok, stopwords);
tok = new RemoveDuplicatesTokenFilter(tok);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(maxTokenLength);
super.setReader(reader);
}
};
}
}
Aggregations