use of org.apache.jena.query.text.assembler.Params.ParamSpec in project jena by apache.
the class GenericFilterAssembler method open.
/*
<#indexLucene> a text:TextIndexLucene ;
text:directory <file:Lucene> ;
text:entityMap <#entMap> ;
text:defineAnalyzers (
[text:addLang "sa-x-iast" ;
text:analyzer [ . . . ]]
[text:defineAnalyzer <#foo> ;
text:analyzer [ . . . ]]
[text:defineFilter <#bar> ;
text:filter [
a text:GenericFilter ;
text:class "org.apache.jena.query.text.filter.SelectiveFoldingFilter" ;
text:params (
[ text:paramName "whitelisted" ;
text:paramType text:TypeSet ;
text:paramValue ("รง") ]
)
]
]
)
*/
@Override
public FilterSpec open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pClass)) {
// text:class is expected to be a string literal
String className = root.getProperty(TextVocab.pClass).getString();
// is the class accessible?
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
Log.error(this, "Filter class " + className + " not found. " + e.getMessage(), e);
return null;
}
// Is the class an Analyzer?
if (!TokenFilter.class.isAssignableFrom(clazz)) {
Log.error(this, clazz.getName() + " has to be a subclass of " + TokenFilter.class.getName());
return null;
}
if (root.hasProperty(TextVocab.pParams)) {
RDFNode node = root.getProperty(TextVocab.pParams).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:params must be a list of parameter resources: " + node);
}
List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
// split the param specs into classes and values for constructor lookup
// add an initial param for the TokenStream source. The source value is
// set to null and the actual value supplied in ConfigurableAnalyzer when
// used.
final Class<?>[] paramClasses = new Class<?>[specs.size() + 1];
paramClasses[0] = TokenStream.class;
final Object[] paramValues = new Object[specs.size() + 1];
paramValues[0] = null;
for (int i = 0; i < specs.size(); i++) {
ParamSpec spec = specs.get(i);
paramClasses[i + 1] = spec.getValueClass();
paramValues[i + 1] = spec.getValue();
}
// Create spec for new filter
return new FilterSpec(clazz, paramClasses, paramValues);
} else {
// use the TokenStream constructor for the new filter
return new FilterSpec(clazz, new Class<?>[] { TokenStream.class }, new Object[] { null });
}
} else {
throw new TextIndexException("text:class property is required by GenericFilter: " + root);
}
}
use of org.apache.jena.query.text.assembler.Params.ParamSpec in project jena by apache.
the class GenericTokenizerAssembler method open.
/*
<#indexLucene> a text:TextIndexLucene ;
text:directory <file:Lucene> ;
text:entityMap <#entMap> ;
text:defineAnalyzers (
[text:addLang "sa-x-iast" ;
text:analyzer [ . . . ]]
[text:defineAnalyzer <#foo> ;
text:analyzer [ . . . ]]
[text:defineTokenizer <#bar> ;
text:tokenizer [
a text:GenericTokenizer ;
text:class "org.apache.lucene.analysis.ngram.NGramTokenizer" ;
text:params (
[ text:paramName "minGram" ;
text:paramType text:TypeInt ;
text:paramValue 3 ]
[ text:paramName "maxGram" ;
text:paramType text:TypeInt ;
text:paramValue 7 ]
)
]
]
)
*/
@Override
public TokenizerSpec open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pClass)) {
// text:class is expected to be a string literal
String className = root.getProperty(TextVocab.pClass).getString();
// is the class accessible?
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
Log.error(this, "Tokenizer class " + className + " not found. " + e.getMessage(), e);
return null;
}
// Is the class an Tokenizer?
if (!Tokenizer.class.isAssignableFrom(clazz)) {
Log.error(this, clazz.getName() + " has to be a subclass of " + Tokenizer.class.getName());
return null;
}
if (root.hasProperty(TextVocab.pParams)) {
RDFNode node = root.getProperty(TextVocab.pParams).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:params must be a list of parameter resources: " + node);
}
List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
// split the param specs into classes and values for constructor lookup
final Class<?>[] paramClasses = new Class<?>[specs.size()];
final Object[] paramValues = new Object[specs.size()];
for (int i = 0; i < specs.size(); i++) {
ParamSpec spec = specs.get(i);
paramClasses[i] = spec.getValueClass();
paramValues[i] = spec.getValue();
}
// Create new analyzer
return new TokenizerSpec(clazz, paramClasses, paramValues);
} else {
// use the nullary Analyzer constructor
return new TokenizerSpec(clazz, new Class<?>[0], new Object[0]);
}
} else {
throw new TextIndexException("text:class property is required by GenericTokenizer: " + root);
}
}
Aggregations