use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class FieldTypePluginLoader method readAnalyzer.
//
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
//
//
private Analyzer readAnalyzer(Node node) throws XPathExpressionException {
final SolrResourceLoader loader = schema.getResourceLoader();
if (node == null)
return null;
NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs, "class");
// check for all of these up front, so we can error if used in
// conjunction with an explicit analyzer class.
NodeList charFilterNodes = (NodeList) xpath.evaluate("./charFilter", node, XPathConstants.NODESET);
NodeList tokenizerNodes = (NodeList) xpath.evaluate("./tokenizer", node, XPathConstants.NODESET);
NodeList tokenFilterNodes = (NodeList) xpath.evaluate("./filter", node, XPathConstants.NODESET);
if (analyzerName != null) {
// own custom nodes (ie: <description> or something like that)
if (0 != charFilterNodes.getLength() || 0 != tokenizerNodes.getLength() || 0 != tokenFilterNodes.getLength()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer class='" + analyzerName + "' can not be combined with nested analysis factories");
}
try {
// No need to be core-aware as Analyzers are not in the core-aware list
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);
Analyzer analyzer = clazz.newInstance();
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version luceneMatchVersion = (matchVersionStr == null) ? schema.getDefaultLuceneMatchVersion() : Config.parseLuceneVersionString(matchVersionStr);
if (luceneMatchVersion == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter");
}
analyzer.setVersion(luceneMatchVersion);
return analyzer;
} catch (Exception e) {
log.error("Cannot load analyzer: " + analyzerName, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: " + analyzerName, e);
}
}
// Load the CharFilters
final ArrayList<CharFilterFactory> charFilters = new ArrayList<>();
AbstractPluginLoader<CharFilterFactory> charFilterLoader = new AbstractPluginLoader<CharFilterFactory>("[schema.xml] analyzer/charFilter", CharFilterFactory.class, false, false) {
@Override
protected CharFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, CharFilterFactory.class.getSimpleName()).toString());
CharFilterFactory factory = loader.newInstance(className, CharFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(CharFilterFactory plugin, Node node) throws Exception {
if (plugin != null) {
charFilters.add(plugin);
}
}
@Override
protected CharFilterFactory register(String name, CharFilterFactory plugin) {
// used for map registration
return null;
}
};
charFilterLoader.load(loader, charFilterNodes);
// Load the Tokenizer
// Although an analyzer only allows a single Tokenizer, we load a list to make sure
// the configuration is ok
final ArrayList<TokenizerFactory> tokenizers = new ArrayList<>(1);
AbstractPluginLoader<TokenizerFactory> tokenizerLoader = new AbstractPluginLoader<TokenizerFactory>("[schema.xml] analyzer/tokenizer", TokenizerFactory.class, false, false) {
@Override
protected TokenizerFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenizerFactory.class.getSimpleName()).toString());
TokenizerFactory factory = loader.newInstance(className, TokenizerFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(TokenizerFactory plugin, Node node) throws Exception {
if (!tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The schema defines multiple tokenizers for: " + node);
}
tokenizers.add(plugin);
}
@Override
protected TokenizerFactory register(String name, TokenizerFactory plugin) {
// used for map registration
return null;
}
};
tokenizerLoader.load(loader, tokenizerNodes);
// Make sure something was loaded
if (tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzer without class or tokenizer");
}
// Load the Filters
final ArrayList<TokenFilterFactory> filters = new ArrayList<>();
AbstractPluginLoader<TokenFilterFactory> filterLoader = new AbstractPluginLoader<TokenFilterFactory>("[schema.xml] analyzer/filter", TokenFilterFactory.class, false, false) {
@Override
protected TokenFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenFilterFactory.class.getSimpleName()).toString());
TokenFilterFactory factory = loader.newInstance(className, TokenFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
if (plugin != null) {
filters.add(plugin);
}
}
@Override
protected TokenFilterFactory register(String name, TokenFilterFactory plugin) throws Exception {
// used for map registration
return null;
}
};
filterLoader.load(loader, tokenFilterNodes);
return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class AnalyzerFactory method create.
public Analyzer create() {
return new Analyzer() {
private final Integer positionIncrementGap = AnalyzerFactory.this.positionIncrementGap;
private final Integer offsetGap = AnalyzerFactory.this.offsetGap;
@Override
public Reader initReader(String fieldName, Reader reader) {
if (charFilterFactories != null && charFilterFactories.size() > 0) {
Reader wrappedReader = reader;
for (CharFilterFactory charFilterFactory : charFilterFactories) {
wrappedReader = charFilterFactory.create(wrappedReader);
}
reader = wrappedReader;
}
return reader;
}
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = tokenizerFactory.create();
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory filterFactory : tokenFilterFactories) {
tokenStream = filterFactory.create(tokenStream);
}
return new TokenStreamComponents(tokenizer, tokenStream);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return null == positionIncrementGap ? super.getPositionIncrementGap(fieldName) : positionIncrementGap;
}
@Override
public int getOffsetGap(String fieldName) {
return null == offsetGap ? super.getOffsetGap(fieldName) : offsetGap;
}
};
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class MultiTermTest method testMultiFound.
@Test
public void testMultiFound() {
SchemaField field = h.getCore().getLatestSchema().getField("content_multi");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
analyzer = field.getType().getIndexAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
}
assertTrue(tc.getCharFilterFactories().length == 0);
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class MultiTermTest method testDefaultCopiedToMulti.
@Test
public void testDefaultCopiedToMulti() {
SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
assertTrue(tc.getCharFilterFactories().length == 0);
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class TestFactories method doTestTokenFilter.
private void doTestTokenFilter(String tokenfilter) throws IOException {
Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
if (factory != null) {
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// it's not ok to return a charfilter or tokenizer here, this makes no sense
assertTrue(mtc instanceof TokenFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
Aggregations