use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class ConfigurableAnalyzerAssembler method open.
/*
text:map (
[ text:field "text" ;
text:predicate rdfs:label;
text:analyzer [
a text:ConfigurableAnalyzer ;
text:tokenizer text:LetterTokenizer ;
text:filters (text:LowerCaseFilter)
]
]
.
*/
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pTokenizer)) {
Resource tokenizerResource = root.getPropertyResourceValue(TextVocab.pTokenizer);
String tokenizer = tokenizerResource.getURI();
List<String> filters;
if (root.hasProperty(TextVocab.pFilters)) {
Resource filtersResource = root.getPropertyResourceValue(TextVocab.pFilters);
filters = toFilterList(filtersResource);
} else {
filters = new ArrayList<>();
}
return new ConfigurableAnalyzer(tokenizer, filters);
} else {
throw new TextIndexException("text:tokenizer setting is required by ConfigurableAnalyzer");
}
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class GenericAnalyzerAssembler method open.
/*
text:map (
[ text:field "text" ;
text:predicate rdfs:label;
text:analyzer [
a text:GenericAnalyzer ;
text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
text:params (
[ text:paramName "stopwords" ;
text:paramType text:TypeSet ;
text:paramValue ("the" "a" "an") ]
[ text:paramName "stemExclusionSet" ;
text:paramType text:TypeSet ;
text:paramValue ("ing" "ed") ]
)
] .
*/
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pClass)) {
// text:class is expected to be a string literal
String className = root.getProperty(TextVocab.pClass).getString();
// is the class accessible?
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
return null;
}
// Is the class an Analyzer?
if (!Analyzer.class.isAssignableFrom(clazz)) {
Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
return null;
}
if (root.hasProperty(TextVocab.pParams)) {
RDFNode node = root.getProperty(TextVocab.pParams).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:params must be a list of parameter resources: " + node);
}
List<Params.ParamSpec> specs = Params.getParamSpecs((Resource) node);
// split the param specs into classes and values for constructor lookup
final Class<?>[] paramClasses = new Class<?>[specs.size()];
final Object[] paramValues = new Object[specs.size()];
for (int i = 0; i < specs.size(); i++) {
Params.ParamSpec spec = specs.get(i);
paramClasses[i] = spec.getValueClass();
paramValues[i] = spec.getValue();
}
// Create new analyzer
return newAnalyzer(clazz, paramClasses, paramValues);
} else {
// use the nullary Analyzer constructor
return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
}
} else {
throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
}
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class GenericTokenizerAssembler method open.
/*
<#indexLucene> a text:TextIndexLucene ;
text:directory <file:Lucene> ;
text:entityMap <#entMap> ;
text:defineAnalyzers (
[text:addLang "sa-x-iast" ;
text:analyzer [ . . . ]]
[text:defineAnalyzer <#foo> ;
text:analyzer [ . . . ]]
[text:defineTokenizer <#bar> ;
text:tokenizer [
a text:GenericTokenizer ;
text:class "org.apache.lucene.analysis.ngram.NGramTokenizer" ;
text:params (
[ text:paramName "minGram" ;
text:paramType text:TypeInt ;
text:paramValue 3 ]
[ text:paramName "maxGram" ;
text:paramType text:TypeInt ;
text:paramValue 7 ]
)
]
]
)
*/
@Override
public TokenizerSpec open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pClass)) {
// text:class is expected to be a string literal
String className = root.getProperty(TextVocab.pClass).getString();
// is the class accessible?
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
Log.error(this, "Tokenizer class " + className + " not found. " + e.getMessage(), e);
return null;
}
// Is the class an Tokenizer?
if (!Tokenizer.class.isAssignableFrom(clazz)) {
Log.error(this, clazz.getName() + " has to be a subclass of " + Tokenizer.class.getName());
return null;
}
if (root.hasProperty(TextVocab.pParams)) {
RDFNode node = root.getProperty(TextVocab.pParams).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:params must be a list of parameter resources: " + node);
}
List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
// split the param specs into classes and values for constructor lookup
final Class<?>[] paramClasses = new Class<?>[specs.size()];
final Object[] paramValues = new Object[specs.size()];
for (int i = 0; i < specs.size(); i++) {
ParamSpec spec = specs.get(i);
paramClasses[i] = spec.getValueClass();
paramValues[i] = spec.getValue();
}
// Create new analyzer
return new TokenizerSpec(clazz, paramClasses, paramValues);
} else {
// use the nullary Analyzer constructor
return new TokenizerSpec(clazz, new Class<?>[0], new Object[0]);
}
} else {
throw new TextIndexException("text:class property is required by GenericTokenizer: " + root);
}
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class LocalizedAnalyzerAssembler method open.
/*
text:map (
[ text:field "text" ;
text:predicate rdfs:label;
text:analyzer [
a lucene:LocalizedAnalyzer ;
text:language "en" ;
]
.
*/
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pLanguage)) {
RDFNode node = root.getProperty(TextVocab.pLanguage).getObject();
if (!node.isLiteral()) {
throw new TextIndexException("text:language property must be a string : " + node);
}
String lang = node.toString();
return Util.getLocalizedAnalyzer(lang);
} else {
return new StandardAnalyzer();
}
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class Params method toStrings.
protected static List<String> toStrings(Resource list) {
List<String> result = new ArrayList<>();
Resource current = list;
while (current != null && !current.equals(RDF.nil)) {
Statement firstStmt = current.getProperty(RDF.first);
if (firstStmt == null) {
throw new TextIndexException("param spec of type set not well formed");
}
RDFNode first = firstStmt.getObject();
if (!first.isLiteral()) {
throw new TextIndexException("param spec of type set item is not a literal: " + first);
}
result.add(((Literal) first).getLexicalForm());
Statement restStmt = current.getProperty(RDF.rest);
if (restStmt == null) {
throw new TextIndexException("param spec of type set not terminated by rdf:nil");
}
RDFNode rest = restStmt.getObject();
if (!rest.isResource()) {
throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
}
current = (Resource) rest;
}
return result;
}
Aggregations