use of org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactory method composeAnalyzer.
private Analyzer composeAnalyzer(NodeState state) {
TokenizerFactory tf = loadTokenizer(state.getChildNode(LuceneIndexConstants.ANL_TOKENIZER));
CharFilterFactory[] cfs = loadCharFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_CHAR_FILTERS));
TokenFilterFactory[] tffs = loadTokenFilterFactories(state.getChildNode(LuceneIndexConstants.ANL_FILTERS));
return new TokenizerChain(cfs, tf, tffs);
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain in project jackrabbit-oak by apache.
the class IndexDefinition method createAnalyzer.
// ~---------------------------------------------------< Analyzer >
private Analyzer createAnalyzer() {
Analyzer result;
Analyzer defaultAnalyzer = LuceneIndexConstants.ANALYZER;
if (analyzers.containsKey(LuceneIndexConstants.ANL_DEFAULT)) {
defaultAnalyzer = analyzers.get(LuceneIndexConstants.ANL_DEFAULT);
}
if (!evaluatePathRestrictions()) {
result = defaultAnalyzer;
} else {
Map<String, Analyzer> analyzerMap = ImmutableMap.<String, Analyzer>builder().put(FieldNames.ANCESTORS, new TokenizerChain(new PathHierarchyTokenizerFactory(Collections.<String, String>emptyMap()))).build();
result = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap);
}
// In case of negative value no limits would be applied
if (maxFieldLength < 0) {
return result;
}
return new LimitTokenCountAnalyzer(result, maxFieldLength);
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactoryTest method analyzerByComposition_TokenFilter.
@Test
public void analyzerByComposition_TokenFilter() throws Exception {
NodeBuilder nb = EMPTY_NODE.builder();
nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "whitespace");
NodeBuilder filters = nb.child(ANL_FILTERS);
filters.setProperty(OAK_CHILD_ORDER, ImmutableList.of("stop", "LowerCase"), NAMES);
filters.child("LowerCase").setProperty(ANL_NAME, "LowerCase");
filters.child("LowerCase").setProperty(JCR_PRIMARYTYPE, "nt:unstructured");
// name is optional. Derived from nodeName
filters.child("stop").setProperty(ANL_LUCENE_MATCH_VERSION, Version.LUCENE_31.toString());
TokenizerChain analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState());
assertEquals(2, analyzer.getFilters().length);
// check the order
assertEquals(StopFilterFactory.class.getName(), analyzer.getFilters()[0].getClassArg());
assertEquals(LowerCaseFilterFactory.class.getName(), analyzer.getFilters()[1].getClassArg());
assertTrue(analyzer.getFilters()[0].isExplicitLuceneMatchVersion());
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactoryTest method analyzerByComposition_FileResource.
@Test
public void analyzerByComposition_FileResource() throws Exception {
NodeBuilder nb = EMPTY_NODE.builder();
nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "whitespace");
NodeBuilder filters = nb.child(ANL_FILTERS);
// name is optional. Derived from nodeName
NodeBuilder stop = filters.child("stop");
stop.setProperty("words", "set1.txt, set2.txt");
createFileNode(stop, "set1.txt", newCharArraySet("foo", "bar"));
createFileNode(stop, "set2.txt", newCharArraySet("foo1", "bar1"));
TokenizerChain analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState());
assertEquals(1, analyzer.getFilters().length);
// check the order
assertEquals(StopFilterFactory.class.getName(), analyzer.getFilters()[0].getClassArg());
StopFilterFactory sff = (StopFilterFactory) analyzer.getFilters()[0];
assertTrue(sff.getStopWords().contains("foo"));
assertTrue(sff.getStopWords().contains("foo1"));
}
use of org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactoryTest method analyzerByComposition_Tokenizer.
@Test
public void analyzerByComposition_Tokenizer() throws Exception {
NodeBuilder nb = EMPTY_NODE.builder();
nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "whitespace");
TokenizerChain analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState());
assertEquals(WhitespaceTokenizerFactory.class.getName(), analyzer.getTokenizer().getClassArg());
nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "pathhierarchy").setProperty("delimiter", "#");
analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState());
assertEquals(PathHierarchyTokenizerFactory.class.getName(), analyzer.getTokenizer().getClassArg());
assertEquals('#', getValue(analyzer.getTokenizer(), "delimiter"));
}
Aggregations