use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class EntityDefinitionAssembler method open.
/*
<#entMap> a text:EntityMap ;
text:entityField "uri" ;
text:defaultField "text" ;
text:map (
[ text:field "text" ; text:predicate rdfs:label ]
[ text:field "type" ; text:predicate rdfs:type ]
) .
*/
@Override
public EntityDefinition open(Assembler a, Resource root, Mode mode) {
String prologue = "PREFIX : <" + NS + "> PREFIX list: <http://jena.apache.org/ARQ/list#> ";
Model model = root.getModel();
String qs1 = StrUtils.strjoinNL(prologue, "SELECT * {", " ?eMap :entityField ?entityField ;", " :map ?map ;", " :defaultField ?dftField .", " OPTIONAL {", " ?eMap :graphField ?graphField", " }", " OPTIONAL {", " ?eMap :langField ?langField", " }", " OPTIONAL {", " ?eMap :uidField ?uidField", " }", "}");
ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1);
pss.setIri("eMap", root.getURI());
Query query1 = QueryFactory.create(pss.toString());
QueryExecution qexec1 = QueryExecutionFactory.create(query1, model);
ResultSet rs1 = qexec1.execSelect();
List<QuerySolution> results = ResultSetFormatter.toList(rs1);
if (results.size() == 0) {
Log.warn(this, "Failed to find a valid EntityMap for : " + root);
throw new TextIndexException("Failed to find a valid EntityMap for : " + root);
}
if (results.size() != 1) {
Log.warn(this, "Multiple matches for EntityMap for : " + root);
throw new TextIndexException("Multiple matches for EntityMap for : " + root);
}
QuerySolution qsol1 = results.get(0);
String entityField = qsol1.getLiteral("entityField").getLexicalForm();
String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null;
String uniqueIdField = qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null;
Multimap<String, Node> mapDefs = HashMultimap.create();
Map<String, Analyzer> analyzerDefs = new HashMap<>();
Map<String, Boolean> noIndexDefs = new HashMap<>();
Statement listStmt = root.getProperty(TextVocab.pMap);
while (listStmt != null) {
RDFNode n = listStmt.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text list node is not a resource : " + n);
}
Resource listResource = n.asResource();
if (listResource.equals(RDF.nil)) {
// end of the list
break;
}
Statement listEntryStmt = listResource.getProperty(RDF.first);
if (listEntryStmt == null) {
throw new TextIndexException("Text map list is not well formed. No rdf:first property");
}
n = listEntryStmt.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text map list entry is not a resource : " + n);
}
Resource listEntry = n.asResource();
Statement fieldStatement = listEntry.getProperty(TextVocab.pField);
if (fieldStatement == null) {
throw new TextIndexException("Text map entry has no field property");
}
n = fieldStatement.getObject();
if (!n.isLiteral()) {
throw new TextIndexException("Text map entry field property has no literal value : " + n);
}
String field = n.asLiteral().getLexicalForm();
Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate);
if (predicateStatement == null) {
throw new TextIndexException("Text map entry has no predicate property");
}
n = predicateStatement.getObject();
if (!n.isURIResource()) {
throw new TextIndexException("Text map entry predicate property has non resource value : " + n);
}
mapDefs.put(field, n.asNode());
Statement noIndexStatement = listEntry.getProperty(TextVocab.pNoIndex);
if (noIndexStatement != null) {
n = noIndexStatement.getObject();
if (!n.isLiteral()) {
throw new TextIndexException("Text map entry noIndex property must be a boolean : " + n);
}
boolean noInx = n.asLiteral().getBoolean();
noIndexDefs.put(field, noInx);
}
Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer);
if (analyzerStatement != null) {
n = analyzerStatement.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text map entry analyzer property is not a resource : " + n);
}
Resource analyzerResource = n.asResource();
Analyzer analyzer = (Analyzer) a.open(analyzerResource);
analyzerDefs.put(field, analyzer);
}
// move on to the next element in the list
listStmt = listResource.getProperty(RDF.rest);
}
// Primary field/predicate
if (defaultField != null) {
Collection<Node> c = mapDefs.get(defaultField);
if (c.isEmpty())
throw new TextIndexException("No definition of primary field '" + defaultField + "'");
}
EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
docDef.setGraphField(graphField);
docDef.setLangField(langField);
docDef.setUidField(uniqueIdField);
for (String f : mapDefs.keys()) {
for (Node p : mapDefs.get(f)) docDef.set(f, p);
}
for (String f : noIndexDefs.keySet()) {
docDef.setNoIndex(f, noIndexDefs.get(f));
}
for (String f : analyzerDefs.keySet()) {
docDef.setAnalyzer(f, analyzerDefs.get(f));
}
return docDef;
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class GenericFilterAssembler method open.
/*
<#indexLucene> a text:TextIndexLucene ;
text:directory <file:Lucene> ;
text:entityMap <#entMap> ;
text:defineAnalyzers (
[text:addLang "sa-x-iast" ;
text:analyzer [ . . . ]]
[text:defineAnalyzer <#foo> ;
text:analyzer [ . . . ]]
[text:defineFilter <#bar> ;
text:filter [
a text:GenericFilter ;
text:class "org.apache.jena.query.text.filter.SelectiveFoldingFilter" ;
text:params (
[ text:paramName "whitelisted" ;
text:paramType text:TypeSet ;
text:paramValue ("รง") ]
)
]
]
)
*/
@Override
public FilterSpec open(Assembler a, Resource root, Mode mode) {
if (root.hasProperty(TextVocab.pClass)) {
// text:class is expected to be a string literal
String className = root.getProperty(TextVocab.pClass).getString();
// is the class accessible?
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
Log.error(this, "Filter class " + className + " not found. " + e.getMessage(), e);
return null;
}
// Is the class an Analyzer?
if (!TokenFilter.class.isAssignableFrom(clazz)) {
Log.error(this, clazz.getName() + " has to be a subclass of " + TokenFilter.class.getName());
return null;
}
if (root.hasProperty(TextVocab.pParams)) {
RDFNode node = root.getProperty(TextVocab.pParams).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:params must be a list of parameter resources: " + node);
}
List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
// split the param specs into classes and values for constructor lookup
// add an initial param for the TokenStream source. The source value is
// set to null and the actual value supplied in ConfigurableAnalyzer when
// used.
final Class<?>[] paramClasses = new Class<?>[specs.size() + 1];
paramClasses[0] = TokenStream.class;
final Object[] paramValues = new Object[specs.size() + 1];
paramValues[0] = null;
for (int i = 0; i < specs.size(); i++) {
ParamSpec spec = specs.get(i);
paramClasses[i + 1] = spec.getValueClass();
paramValues[i + 1] = spec.getValue();
}
// Create spec for new filter
return new FilterSpec(clazz, paramClasses, paramValues);
} else {
// use the TokenStream constructor for the new filter
return new FilterSpec(clazz, new Class<?>[] { TokenStream.class }, new Object[] { null });
}
} else {
throw new TextIndexException("text:class property is required by GenericFilter: " + root);
}
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class PropListsAssembler method getPropsList.
private static List<Resource> getPropsList(Statement stmt) {
List<Resource> props = new ArrayList<>();
RDFNode aNode = stmt.getObject();
if (!aNode.isResource()) {
throw new TextIndexException("text:props is not a list : " + aNode);
}
Resource current = (Resource) aNode;
while (current != null && !current.equals(RDF.nil)) {
Statement firstStmt = current.getProperty(RDF.first);
if (firstStmt == null) {
throw new TextIndexException("text:props list not well formed: " + current);
}
RDFNode first = firstStmt.getObject();
if (!first.isURIResource()) {
throw new TextIndexException("text:props list item is not a Resource : " + first);
}
props.add((Resource) first);
Statement restStmt = current.getProperty(RDF.rest);
if (restStmt == null) {
throw new TextIndexException("text:props list not terminated by rdf:nil");
}
RDFNode rest = restStmt.getObject();
if (!rest.isResource()) {
throw new TextIndexException("text:props list rest node is not a resource : " + rest);
}
current = (Resource) rest;
}
return props;
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class StandardAnalyzerAssembler method analyzerWithStopWords.
private Analyzer analyzerWithStopWords(Resource root) {
RDFNode node = root.getProperty(TextVocab.pStopWords).getObject();
if (!node.isResource()) {
throw new TextIndexException("text:stopWords property takes a list as a value : " + node);
}
CharArraySet stopWords = toCharArraySet((Resource) node);
return new StandardAnalyzer(stopWords);
}
use of org.apache.jena.query.text.TextIndexException in project jena by apache.
the class StandardAnalyzerAssembler method toList.
private List<String> toList(Resource list) {
List<String> result = new ArrayList<>();
Resource current = list;
while (current != null && !current.equals(RDF.nil)) {
Statement stmt = current.getProperty(RDF.first);
if (stmt == null) {
throw new TextIndexException("stop word list not well formed");
}
RDFNode node = stmt.getObject();
if (!node.isLiteral()) {
throw new TextIndexException("stop word is not a literal : " + node);
}
result.add(((Literal) node).getLexicalForm());
stmt = current.getProperty(RDF.rest);
if (stmt == null) {
throw new TextIndexException("stop word list not terminated by rdf:nil");
}
node = stmt.getObject();
if (!node.isResource()) {
throw new TextIndexException("stop word list node is not a resource : " + node);
}
current = (Resource) node;
}
return result;
}
Aggregations