use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class ManagedIndexSchema method informResourceLoaderAwareObjectsInChain.
/**
* After creating a new FieldType, it may contain components that implement
* the ResourceLoaderAware interface, which need to be informed after they
* are loaded (as they depend on this callback to complete initialization work)
*/
protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
CharFilterFactory[] charFilters = chain.getCharFilterFactories();
for (CharFilterFactory next : charFilters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
}
TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
if (tokenizerFactory instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) tokenizerFactory).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
TokenFilterFactory[] filters = chain.getTokenFilterFactories();
for (TokenFilterFactory next : filters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
}
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class FieldType method getAnalyzerProperties.
/**
* Returns a description of the given analyzer, by either reporting the Analyzer class
* name (and optionally luceneMatchVersion) if it's not a TokenizerChain, or if it is,
* querying each analysis factory for its name and args.
*/
protected static SimpleOrderedMap<Object> getAnalyzerProperties(Analyzer analyzer) {
SimpleOrderedMap<Object> analyzerProps = new SimpleOrderedMap<>();
if (analyzer instanceof TokenizerChain) {
Map<String, String> factoryArgs;
TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
if (0 < charFilterFactories.length) {
List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
for (CharFilterFactory charFilterFactory : charFilterFactories) {
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
props.add(CLASS_NAME, charFilterFactory.getClassArg());
factoryArgs = charFilterFactory.getOriginalArgs();
if (null != factoryArgs) {
for (String key : factoryArgs.keySet()) {
if (!CLASS_NAME.equals(key)) {
if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
if (charFilterFactory.isExplicitLuceneMatchVersion()) {
props.add(key, factoryArgs.get(key));
}
} else {
props.add(key, factoryArgs.get(key));
}
}
}
}
charFilterProps.add(props);
}
analyzerProps.add(CHAR_FILTERS, charFilterProps);
}
SimpleOrderedMap<Object> tokenizerProps = new SimpleOrderedMap<>();
TokenizerFactory tokenizerFactory = tokenizerChain.getTokenizerFactory();
tokenizerProps.add(CLASS_NAME, tokenizerFactory.getClassArg());
factoryArgs = tokenizerFactory.getOriginalArgs();
if (null != factoryArgs) {
for (String key : factoryArgs.keySet()) {
if (!CLASS_NAME.equals(key)) {
if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
if (tokenizerFactory.isExplicitLuceneMatchVersion()) {
tokenizerProps.add(key, factoryArgs.get(key));
}
} else {
tokenizerProps.add(key, factoryArgs.get(key));
}
}
}
}
analyzerProps.add(TOKENIZER, tokenizerProps);
TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
if (0 < filterFactories.length) {
List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
for (TokenFilterFactory filterFactory : filterFactories) {
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
props.add(CLASS_NAME, filterFactory.getClassArg());
factoryArgs = filterFactory.getOriginalArgs();
if (null != factoryArgs) {
for (String key : factoryArgs.keySet()) {
if (!CLASS_NAME.equals(key)) {
if (LUCENE_MATCH_VERSION_PARAM.equals(key)) {
if (filterFactory.isExplicitLuceneMatchVersion()) {
props.add(key, factoryArgs.get(key));
}
} else {
props.add(key, factoryArgs.get(key));
}
}
}
}
filterProps.add(props);
}
analyzerProps.add(FILTERS, filterProps);
}
} else {
// analyzer is not instanceof TokenizerChain
analyzerProps.add(CLASS_NAME, analyzer.getClass().getName());
if (analyzer.getVersion() != Version.LATEST) {
analyzerProps.add(LUCENE_MATCH_VERSION_PARAM, analyzer.getVersion().toString());
}
}
return analyzerProps;
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class FieldTypePluginLoader method readAnalyzer.
//
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
//
//
private Analyzer readAnalyzer(Node node) throws XPathExpressionException {
final SolrResourceLoader loader = schema.getResourceLoader();
if (node == null)
return null;
NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs, "class");
// check for all of these up front, so we can error if used in
// conjunction with an explicit analyzer class.
NodeList charFilterNodes = (NodeList) xpath.evaluate("./charFilter", node, XPathConstants.NODESET);
NodeList tokenizerNodes = (NodeList) xpath.evaluate("./tokenizer", node, XPathConstants.NODESET);
NodeList tokenFilterNodes = (NodeList) xpath.evaluate("./filter", node, XPathConstants.NODESET);
if (analyzerName != null) {
// own custom nodes (ie: <description> or something like that)
if (0 != charFilterNodes.getLength() || 0 != tokenizerNodes.getLength() || 0 != tokenFilterNodes.getLength()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer class='" + analyzerName + "' can not be combined with nested analysis factories");
}
try {
// No need to be core-aware as Analyzers are not in the core-aware list
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);
Analyzer analyzer = clazz.newInstance();
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version luceneMatchVersion = (matchVersionStr == null) ? schema.getDefaultLuceneMatchVersion() : Config.parseLuceneVersionString(matchVersionStr);
if (luceneMatchVersion == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter");
}
analyzer.setVersion(luceneMatchVersion);
return analyzer;
} catch (Exception e) {
log.error("Cannot load analyzer: " + analyzerName, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: " + analyzerName, e);
}
}
// Load the CharFilters
final ArrayList<CharFilterFactory> charFilters = new ArrayList<>();
AbstractPluginLoader<CharFilterFactory> charFilterLoader = new AbstractPluginLoader<CharFilterFactory>("[schema.xml] analyzer/charFilter", CharFilterFactory.class, false, false) {
@Override
protected CharFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, CharFilterFactory.class.getSimpleName()).toString());
CharFilterFactory factory = loader.newInstance(className, CharFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(CharFilterFactory plugin, Node node) throws Exception {
if (plugin != null) {
charFilters.add(plugin);
}
}
@Override
protected CharFilterFactory register(String name, CharFilterFactory plugin) {
// used for map registration
return null;
}
};
charFilterLoader.load(loader, charFilterNodes);
// Load the Tokenizer
// Although an analyzer only allows a single Tokenizer, we load a list to make sure
// the configuration is ok
final ArrayList<TokenizerFactory> tokenizers = new ArrayList<>(1);
AbstractPluginLoader<TokenizerFactory> tokenizerLoader = new AbstractPluginLoader<TokenizerFactory>("[schema.xml] analyzer/tokenizer", TokenizerFactory.class, false, false) {
@Override
protected TokenizerFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenizerFactory.class.getSimpleName()).toString());
TokenizerFactory factory = loader.newInstance(className, TokenizerFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(TokenizerFactory plugin, Node node) throws Exception {
if (!tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The schema defines multiple tokenizers for: " + node);
}
tokenizers.add(plugin);
}
@Override
protected TokenizerFactory register(String name, TokenizerFactory plugin) {
// used for map registration
return null;
}
};
tokenizerLoader.load(loader, tokenizerNodes);
// Make sure something was loaded
if (tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzer without class or tokenizer");
}
// Load the Filters
final ArrayList<TokenFilterFactory> filters = new ArrayList<>();
AbstractPluginLoader<TokenFilterFactory> filterLoader = new AbstractPluginLoader<TokenFilterFactory>("[schema.xml] analyzer/filter", TokenFilterFactory.class, false, false) {
@Override
protected TokenFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
final Map<String, String> params = DOMUtil.toMap(node.getAttributes());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenFilterFactory.class.getSimpleName()).toString());
TokenFilterFactory factory = loader.newInstance(className, TokenFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
factory.setExplicitLuceneMatchVersion(null != configuredVersion);
return factory;
}
@Override
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
if (plugin != null) {
filters.add(plugin);
}
}
@Override
protected TokenFilterFactory register(String name, TokenFilterFactory plugin) throws Exception {
// used for map registration
return null;
}
};
filterLoader.load(loader, tokenFilterNodes);
return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project tika by apache.
the class AnalyzerDeserializer method buildCharFilters.
private static CharFilterFactory[] buildCharFilters(JsonElement el, String analyzerName) throws IOException {
if (el == null || el.isJsonNull()) {
return null;
}
if (!el.isJsonArray()) {
throw new IllegalArgumentException("Expecting array for charfilters, but got:" + el.toString() + " for " + analyzerName);
}
JsonArray jsonArray = (JsonArray) el;
List<CharFilterFactory> ret = new LinkedList<CharFilterFactory>();
for (JsonElement filterMap : jsonArray) {
if (!(filterMap instanceof JsonObject)) {
throw new IllegalArgumentException("Expecting a map with \"factory\" string and \"params\" map in char filter factory;" + " not: " + filterMap.toString() + " in " + analyzerName);
}
JsonElement factoryEl = ((JsonObject) filterMap).get(FACTORY);
if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
throw new IllegalArgumentException("Expecting value for factory in char filter factory builder in:" + analyzerName);
}
String factoryName = factoryEl.getAsString();
factoryName = factoryName.replaceAll("oala.", "org.apache.lucene.analysis.");
JsonElement paramsEl = ((JsonObject) filterMap).get(PARAMS);
Map<String, String> params = mapify(paramsEl);
String spiName = "";
for (String s : CharFilterFactory.availableCharFilters()) {
Class clazz = CharFilterFactory.lookupClass(s);
if (clazz.getName().equals(factoryName)) {
spiName = s;
break;
}
}
if (spiName.equals("")) {
throw new IllegalArgumentException("A SPI class of type org.apache.lucene.analysis.util.CharFilterFactory with name" + "'" + factoryName + "' does not exist.");
}
try {
CharFilterFactory charFilterFactory = CharFilterFactory.forName(spiName, params);
if (charFilterFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) charFilterFactory).inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
}
ret.add(charFilterFactory);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("While trying to load " + analyzerName + ": " + e.getMessage(), e);
}
}
if (ret.size() == 0) {
return new CharFilterFactory[0];
}
return ret.toArray(new CharFilterFactory[ret.size()]);
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project stanbol by apache.
the class LuceneLabelTokenizer method activate.
@Activate
protected void activate(ComponentContext ctx) throws ConfigurationException {
//init the Solr ResourceLoader used for initialising the components
resourceLoader = new StanbolResourceLoader(parentResourceLoader);
//init the Solr CharFilterFactory (optional)
Object value = ctx.getProperties().get(PROPERTY_CHAR_FILTER_FACTORY);
if (value != null && !value.toString().isEmpty() && !DEFAULT_CLASS_NAME_CONFIG.equals(value)) {
Entry<String, Map<String, String>> charFilterConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, value.toString());
charFilterFactory = initAnalyzer(PROPERTY_CHAR_FILTER_FACTORY, charFilterConfig.getKey(), CharFilterFactory.class, charFilterConfig.getValue());
} else {
charFilterFactory = null;
}
//now initialise the TokenizerFactory (required)
value = ctx.getProperties().get(PROPERTY_TOKENIZER_FACTORY);
if (value == null || value.toString().isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(value)) {
throw new ConfigurationException(PROPERTY_TOKENIZER_FACTORY, "The class name of the Lucene Tokemizer MUST BE configured");
}
Entry<String, Map<String, String>> tokenizerConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, value.toString());
tokenizerFactory = initAnalyzer(PROPERTY_TOKENIZER_FACTORY, tokenizerConfig.getKey(), TokenizerFactory.class, tokenizerConfig.getValue());
//initialise the list of Token Filters
Collection<String> values;
value = ctx.getProperties().get(PROPERTY_TOKEN_FILTER_FACTORY);
if (value == null) {
values = Collections.emptyList();
} else if (value instanceof Collection<?>) {
values = new ArrayList<String>(((Collection<?>) value).size());
for (Object v : (Collection<Object>) value) {
values.add(v.toString());
}
} else if (value instanceof String[]) {
values = Arrays.asList((String[]) value);
} else if (value instanceof String) {
values = Collections.singleton((String) value);
} else {
throw new ConfigurationException(PROPERTY_TOKEN_FILTER_FACTORY, "The type '" + value.getClass() + "' of the parsed value is not supported (supported are " + "Collections, String[] and String values)!");
}
for (String filterConfigLine : values) {
if (filterConfigLine == null || filterConfigLine.isEmpty() || DEFAULT_CLASS_NAME_CONFIG.equals(filterConfigLine)) {
//ignore null, empty and the default value
continue;
}
Entry<String, Map<String, String>> filterConfig = parseConfigLine(PROPERTY_CHAR_FILTER_FACTORY, filterConfigLine);
TokenFilterFactory tff = initAnalyzer(PROPERTY_TOKEN_FILTER_FACTORY, filterConfig.getKey(), TokenFilterFactory.class, filterConfig.getValue());
filterFactories.add(tff);
}
//init the language configuration
value = ctx.getProperties().get(LabelTokenizer.SUPPORTED_LANUAGES);
if (value == null) {
throw new ConfigurationException(LabelTokenizer.SUPPORTED_LANUAGES, "The language " + "configuration MUST BE present!");
}
langConf.setConfiguration(ctx.getProperties());
}
Aggregations