Search in sources :

Example 11 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class EntityDefinitionAssembler method open.

/*
<#entMap> a text:EntityMap ;
    text:entityField      "uri" ;
    text:defaultField     "text" ;
    text:map (
         [ text:field "text" ; text:predicate rdfs:label ]
         [ text:field "type" ; text:predicate rdfs:type  ]
         ) .
     */
@Override
public EntityDefinition open(Assembler a, Resource root, Mode mode) {
    String prologue = "PREFIX : <" + NS + ">   PREFIX list: <http://jena.apache.org/ARQ/list#> ";
    Model model = root.getModel();
    String qs1 = StrUtils.strjoinNL(prologue, "SELECT * {", "  ?eMap  :entityField  ?entityField ;", "         :map ?map ;", "         :defaultField ?dftField .", "  OPTIONAL {", "    ?eMap :graphField ?graphField", "  }", "  OPTIONAL {", "    ?eMap :langField ?langField", "  }", "  OPTIONAL {", "    ?eMap :uidField ?uidField", "  }", "}");
    ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1);
    pss.setIri("eMap", root.getURI());
    Query query1 = QueryFactory.create(pss.toString());
    QueryExecution qexec1 = QueryExecutionFactory.create(query1, model);
    ResultSet rs1 = qexec1.execSelect();
    List<QuerySolution> results = ResultSetFormatter.toList(rs1);
    if (results.size() == 0) {
        Log.warn(this, "Failed to find a valid EntityMap for : " + root);
        throw new TextIndexException("Failed to find a valid EntityMap for : " + root);
    }
    if (results.size() != 1) {
        Log.warn(this, "Multiple matches for EntityMap for : " + root);
        throw new TextIndexException("Multiple matches for EntityMap for : " + root);
    }
    QuerySolution qsol1 = results.get(0);
    String entityField = qsol1.getLiteral("entityField").getLexicalForm();
    String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
    String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
    String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null;
    String uniqueIdField = qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null;
    Multimap<String, Node> mapDefs = HashMultimap.create();
    Map<String, Analyzer> analyzerDefs = new HashMap<>();
    Map<String, Boolean> noIndexDefs = new HashMap<>();
    Statement listStmt = root.getProperty(TextVocab.pMap);
    while (listStmt != null) {
        RDFNode n = listStmt.getObject();
        if (!n.isResource()) {
            throw new TextIndexException("Text list node is not a resource : " + n);
        }
        Resource listResource = n.asResource();
        if (listResource.equals(RDF.nil)) {
            // end of the list
            break;
        }
        Statement listEntryStmt = listResource.getProperty(RDF.first);
        if (listEntryStmt == null) {
            throw new TextIndexException("Text map list is not well formed.  No rdf:first property");
        }
        n = listEntryStmt.getObject();
        if (!n.isResource()) {
            throw new TextIndexException("Text map list entry is not a resource : " + n);
        }
        Resource listEntry = n.asResource();
        Statement fieldStatement = listEntry.getProperty(TextVocab.pField);
        if (fieldStatement == null) {
            throw new TextIndexException("Text map entry has no field property");
        }
        n = fieldStatement.getObject();
        if (!n.isLiteral()) {
            throw new TextIndexException("Text map entry field property has no literal value : " + n);
        }
        String field = n.asLiteral().getLexicalForm();
        Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate);
        if (predicateStatement == null) {
            throw new TextIndexException("Text map entry has no predicate property");
        }
        n = predicateStatement.getObject();
        if (!n.isURIResource()) {
            throw new TextIndexException("Text map entry predicate property has non resource value : " + n);
        }
        mapDefs.put(field, n.asNode());
        Statement noIndexStatement = listEntry.getProperty(TextVocab.pNoIndex);
        if (noIndexStatement != null) {
            n = noIndexStatement.getObject();
            if (!n.isLiteral()) {
                throw new TextIndexException("Text map entry noIndex property must be a boolean : " + n);
            }
            boolean noInx = n.asLiteral().getBoolean();
            noIndexDefs.put(field, noInx);
        }
        Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer);
        if (analyzerStatement != null) {
            n = analyzerStatement.getObject();
            if (!n.isResource()) {
                throw new TextIndexException("Text map entry analyzer property is not a resource : " + n);
            }
            Resource analyzerResource = n.asResource();
            Analyzer analyzer = (Analyzer) a.open(analyzerResource);
            analyzerDefs.put(field, analyzer);
        }
        // move on to the next element in the list
        listStmt = listResource.getProperty(RDF.rest);
    }
    // Primary field/predicate
    if (defaultField != null) {
        Collection<Node> c = mapDefs.get(defaultField);
        if (c.isEmpty())
            throw new TextIndexException("No definition of primary field '" + defaultField + "'");
    }
    EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
    docDef.setGraphField(graphField);
    docDef.setLangField(langField);
    docDef.setUidField(uniqueIdField);
    for (String f : mapDefs.keys()) {
        for (Node p : mapDefs.get(f)) docDef.set(f, p);
    }
    for (String f : noIndexDefs.keySet()) {
        docDef.setNoIndex(f, noIndexDefs.get(f));
    }
    for (String f : analyzerDefs.keySet()) {
        docDef.setAnalyzer(f, analyzerDefs.get(f));
    }
    return docDef;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) HashMap(java.util.HashMap) Node(org.apache.jena.graph.Node) Analyzer(org.apache.lucene.analysis.Analyzer) EntityDefinition(org.apache.jena.query.text.EntityDefinition)

Example 12 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class GenericFilterAssembler method open.

/*
    <#indexLucene> a text:TextIndexLucene ;
        text:directory <file:Lucene> ;
        text:entityMap <#entMap> ;
        text:defineAnalyzers (
            [text:addLang "sa-x-iast" ;
             text:analyzer [ . . . ]]
            [text:defineAnalyzer <#foo> ;
             text:analyzer [ . . . ]]
            [text:defineFilter <#bar> ;
             text:filter [
               a text:GenericFilter ;
               text:class "org.apache.jena.query.text.filter.SelectiveFoldingFilter" ;
               text:params (
                    [ text:paramName "whitelisted" ;
                      text:paramType text:TypeSet ;
                      text:paramValue ("รง") ]
                    )
              ]
            ]
        )
     */
@Override
public FilterSpec open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pClass)) {
        // text:class is expected to be a string literal
        String className = root.getProperty(TextVocab.pClass).getString();
        // is the class accessible?
        Class<?> clazz = null;
        try {
            clazz = Class.forName(className);
        } catch (ClassNotFoundException e) {
            Log.error(this, "Filter class " + className + " not found. " + e.getMessage(), e);
            return null;
        }
        // Is the class an Analyzer?
        if (!TokenFilter.class.isAssignableFrom(clazz)) {
            Log.error(this, clazz.getName() + " has to be a subclass of " + TokenFilter.class.getName());
            return null;
        }
        if (root.hasProperty(TextVocab.pParams)) {
            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
            if (!node.isResource()) {
                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
            }
            List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
            // split the param specs into classes and values for constructor lookup
            // add an initial param for the TokenStream source. The source value is
            // set to null and the actual value supplied in ConfigurableAnalyzer when
            // used.
            final Class<?>[] paramClasses = new Class<?>[specs.size() + 1];
            paramClasses[0] = TokenStream.class;
            final Object[] paramValues = new Object[specs.size() + 1];
            paramValues[0] = null;
            for (int i = 0; i < specs.size(); i++) {
                ParamSpec spec = specs.get(i);
                paramClasses[i + 1] = spec.getValueClass();
                paramValues[i + 1] = spec.getValue();
            }
            // Create spec for new filter
            return new FilterSpec(clazz, paramClasses, paramValues);
        } else {
            // use the TokenStream constructor for the new filter
            return new FilterSpec(clazz, new Class<?>[] { TokenStream.class }, new Object[] { null });
        }
    } else {
        throw new TextIndexException("text:class property is required by GenericFilter: " + root);
    }
}
Also used : ParamSpec(org.apache.jena.query.text.assembler.Params.ParamSpec) TextIndexException(org.apache.jena.query.text.TextIndexException) RDFNode(org.apache.jena.rdf.model.RDFNode) TokenFilter(org.apache.lucene.analysis.TokenFilter)

Example 13 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class PropListsAssembler method getPropsList.

private static List<Resource> getPropsList(Statement stmt) {
    List<Resource> props = new ArrayList<>();
    RDFNode aNode = stmt.getObject();
    if (!aNode.isResource()) {
        throw new TextIndexException("text:props is not a list : " + aNode);
    }
    Resource current = (Resource) aNode;
    while (current != null && !current.equals(RDF.nil)) {
        Statement firstStmt = current.getProperty(RDF.first);
        if (firstStmt == null) {
            throw new TextIndexException("text:props list not well formed: " + current);
        }
        RDFNode first = firstStmt.getObject();
        if (!first.isURIResource()) {
            throw new TextIndexException("text:props list item is not a Resource : " + first);
        }
        props.add((Resource) first);
        Statement restStmt = current.getProperty(RDF.rest);
        if (restStmt == null) {
            throw new TextIndexException("text:props list not terminated by rdf:nil");
        }
        RDFNode rest = restStmt.getObject();
        if (!rest.isResource()) {
            throw new TextIndexException("text:props  list rest node is not a resource : " + rest);
        }
        current = (Resource) rest;
    }
    return props;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) Statement(org.apache.jena.rdf.model.Statement) Resource(org.apache.jena.rdf.model.Resource) ArrayList(java.util.ArrayList) RDFNode(org.apache.jena.rdf.model.RDFNode)

Example 14 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class StandardAnalyzerAssembler method analyzerWithStopWords.

private Analyzer analyzerWithStopWords(Resource root) {
    RDFNode node = root.getProperty(TextVocab.pStopWords).getObject();
    if (!node.isResource()) {
        throw new TextIndexException("text:stopWords property takes a list as a value : " + node);
    }
    CharArraySet stopWords = toCharArraySet((Resource) node);
    return new StandardAnalyzer(stopWords);
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) TextIndexException(org.apache.jena.query.text.TextIndexException) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) RDFNode(org.apache.jena.rdf.model.RDFNode)

Example 15 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class StandardAnalyzerAssembler method toList.

private List<String> toList(Resource list) {
    List<String> result = new ArrayList<>();
    Resource current = list;
    while (current != null && !current.equals(RDF.nil)) {
        Statement stmt = current.getProperty(RDF.first);
        if (stmt == null) {
            throw new TextIndexException("stop word list not well formed");
        }
        RDFNode node = stmt.getObject();
        if (!node.isLiteral()) {
            throw new TextIndexException("stop word is not a literal : " + node);
        }
        result.add(((Literal) node).getLexicalForm());
        stmt = current.getProperty(RDF.rest);
        if (stmt == null) {
            throw new TextIndexException("stop word list not terminated by rdf:nil");
        }
        node = stmt.getObject();
        if (!node.isResource()) {
            throw new TextIndexException("stop word list node is not a resource : " + node);
        }
        current = (Resource) node;
    }
    return result;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) Statement(org.apache.jena.rdf.model.Statement) ArrayList(java.util.ArrayList) Resource(org.apache.jena.rdf.model.Resource) RDFNode(org.apache.jena.rdf.model.RDFNode)

Aggregations

TextIndexException (org.apache.jena.query.text.TextIndexException)22 RDFNode (org.apache.jena.rdf.model.RDFNode)17 Resource (org.apache.jena.rdf.model.Resource)14 Statement (org.apache.jena.rdf.model.Statement)12 ArrayList (java.util.ArrayList)7 Analyzer (org.apache.lucene.analysis.Analyzer)4 HashMap (java.util.HashMap)2 EntityDefinition (org.apache.jena.query.text.EntityDefinition)2 FilterSpec (org.apache.jena.query.text.assembler.GenericFilterAssembler.FilterSpec)2 TokenizerSpec (org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec)2 ParamSpec (org.apache.jena.query.text.assembler.Params.ParamSpec)2 Literal (org.apache.jena.rdf.model.Literal)2 CharArraySet (org.apache.lucene.analysis.CharArraySet)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 Reader (java.io.Reader)1 List (java.util.List)1 RDFDatatype (org.apache.jena.datatypes.RDFDatatype)1 Node (org.apache.jena.graph.Node)1 TextIndexConfig (org.apache.jena.query.text.TextIndexConfig)1 ConfigurableAnalyzer (org.apache.jena.query.text.analyzer.ConfigurableAnalyzer)1