Search in sources :

Example 16 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class ConfigurableAnalyzerAssembler method open.

/*
    text:map (
         [ text:field "text" ; 
           text:predicate rdfs:label;
           text:analyzer [
               a  text:ConfigurableAnalyzer ;
               text:tokenizer text:LetterTokenizer ;
               text:filters (text:LowerCaseFilter)
           ]
         ]
        .
    */
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pTokenizer)) {
        Resource tokenizerResource = root.getPropertyResourceValue(TextVocab.pTokenizer);
        String tokenizer = tokenizerResource.getURI();
        List<String> filters;
        if (root.hasProperty(TextVocab.pFilters)) {
            Resource filtersResource = root.getPropertyResourceValue(TextVocab.pFilters);
            filters = toFilterList(filtersResource);
        } else {
            filters = new ArrayList<>();
        }
        return new ConfigurableAnalyzer(tokenizer, filters);
    } else {
        throw new TextIndexException("text:tokenizer setting is required by ConfigurableAnalyzer");
    }
}
Also used : ConfigurableAnalyzer(org.apache.jena.query.text.analyzer.ConfigurableAnalyzer) TextIndexException(org.apache.jena.query.text.TextIndexException) Resource(org.apache.jena.rdf.model.Resource)

Example 17 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class GenericAnalyzerAssembler method open.

/*
    text:map (
         [ text:field "text" ; 
           text:predicate rdfs:label;
           text:analyzer [
               a text:GenericAnalyzer ;
               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
               text:params (
                    [ text:paramName "stopwords" ;
                      text:paramType text:TypeSet ;
                      text:paramValue ("the" "a" "an") ]
                    [ text:paramName "stemExclusionSet" ;
                      text:paramType text:TypeSet ;
                      text:paramValue ("ing" "ed") ]
                    )
           ] .
     */
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pClass)) {
        // text:class is expected to be a string literal
        String className = root.getProperty(TextVocab.pClass).getString();
        // is the class accessible?
        Class<?> clazz = null;
        try {
            clazz = Class.forName(className);
        } catch (ClassNotFoundException e) {
            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
            return null;
        }
        // Is the class an Analyzer?
        if (!Analyzer.class.isAssignableFrom(clazz)) {
            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
            return null;
        }
        if (root.hasProperty(TextVocab.pParams)) {
            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
            if (!node.isResource()) {
                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
            }
            List<Params.ParamSpec> specs = Params.getParamSpecs((Resource) node);
            // split the param specs into classes and values for constructor lookup
            final Class<?>[] paramClasses = new Class<?>[specs.size()];
            final Object[] paramValues = new Object[specs.size()];
            for (int i = 0; i < specs.size(); i++) {
                Params.ParamSpec spec = specs.get(i);
                paramClasses[i] = spec.getValueClass();
                paramValues[i] = spec.getValue();
            }
            // Create new analyzer
            return newAnalyzer(clazz, paramClasses, paramValues);
        } else {
            // use the nullary Analyzer constructor
            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
        }
    } else {
        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
    }
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) Analyzer(org.apache.lucene.analysis.Analyzer) RDFNode(org.apache.jena.rdf.model.RDFNode)

Example 18 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class GenericTokenizerAssembler method open.

/*
    <#indexLucene> a text:TextIndexLucene ;
        text:directory <file:Lucene> ;
        text:entityMap <#entMap> ;
        text:defineAnalyzers (
            [text:addLang "sa-x-iast" ;
             text:analyzer [ . . . ]]
            [text:defineAnalyzer <#foo> ;
             text:analyzer [ . . . ]]
            [text:defineTokenizer <#bar> ;
             text:tokenizer [
               a text:GenericTokenizer ;
               text:class "org.apache.lucene.analysis.ngram.NGramTokenizer" ;
               text:params (
                    [ text:paramName "minGram" ;
                      text:paramType text:TypeInt ;
                      text:paramValue 3 ]
                    [ text:paramName "maxGram" ;
                      text:paramType text:TypeInt ;
                      text:paramValue 7 ]
                    )
              ]
            ]
        )
     */
@Override
public TokenizerSpec open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pClass)) {
        // text:class is expected to be a string literal
        String className = root.getProperty(TextVocab.pClass).getString();
        // is the class accessible?
        Class<?> clazz = null;
        try {
            clazz = Class.forName(className);
        } catch (ClassNotFoundException e) {
            Log.error(this, "Tokenizer class " + className + " not found. " + e.getMessage(), e);
            return null;
        }
        // Is the class an Tokenizer?
        if (!Tokenizer.class.isAssignableFrom(clazz)) {
            Log.error(this, clazz.getName() + " has to be a subclass of " + Tokenizer.class.getName());
            return null;
        }
        if (root.hasProperty(TextVocab.pParams)) {
            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
            if (!node.isResource()) {
                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
            }
            List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
            // split the param specs into classes and values for constructor lookup
            final Class<?>[] paramClasses = new Class<?>[specs.size()];
            final Object[] paramValues = new Object[specs.size()];
            for (int i = 0; i < specs.size(); i++) {
                ParamSpec spec = specs.get(i);
                paramClasses[i] = spec.getValueClass();
                paramValues[i] = spec.getValue();
            }
            // Create new analyzer
            return new TokenizerSpec(clazz, paramClasses, paramValues);
        } else {
            // use the nullary Analyzer constructor
            return new TokenizerSpec(clazz, new Class<?>[0], new Object[0]);
        }
    } else {
        throw new TextIndexException("text:class property is required by GenericTokenizer: " + root);
    }
}
Also used : ParamSpec(org.apache.jena.query.text.assembler.Params.ParamSpec) TextIndexException(org.apache.jena.query.text.TextIndexException) Tokenizer(org.apache.lucene.analysis.Tokenizer) RDFNode(org.apache.jena.rdf.model.RDFNode)

Example 19 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class LocalizedAnalyzerAssembler method open.

/*
    text:map (
         [ text:field "text" ; 
           text:predicate rdfs:label;
           text:analyzer [
               a  lucene:LocalizedAnalyzer ;
               text:language "en" ;
         ]
        .
     */
@Override
public Analyzer open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pLanguage)) {
        RDFNode node = root.getProperty(TextVocab.pLanguage).getObject();
        if (!node.isLiteral()) {
            throw new TextIndexException("text:language property must be a string : " + node);
        }
        String lang = node.toString();
        return Util.getLocalizedAnalyzer(lang);
    } else {
        return new StandardAnalyzer();
    }
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) RDFNode(org.apache.jena.rdf.model.RDFNode)

Example 20 with TextIndexException

use of org.apache.jena.query.text.TextIndexException in project jena by apache.

the class Params method toStrings.

protected static List<String> toStrings(Resource list) {
    List<String> result = new ArrayList<>();
    Resource current = list;
    while (current != null && !current.equals(RDF.nil)) {
        Statement firstStmt = current.getProperty(RDF.first);
        if (firstStmt == null) {
            throw new TextIndexException("param spec of type set not well formed");
        }
        RDFNode first = firstStmt.getObject();
        if (!first.isLiteral()) {
            throw new TextIndexException("param spec of type set item is not a literal: " + first);
        }
        result.add(((Literal) first).getLexicalForm());
        Statement restStmt = current.getProperty(RDF.rest);
        if (restStmt == null) {
            throw new TextIndexException("param spec of type set not terminated by rdf:nil");
        }
        RDFNode rest = restStmt.getObject();
        if (!rest.isResource()) {
            throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
        }
        current = (Resource) rest;
    }
    return result;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) Statement(org.apache.jena.rdf.model.Statement) ArrayList(java.util.ArrayList) Resource(org.apache.jena.rdf.model.Resource) RDFNode(org.apache.jena.rdf.model.RDFNode)

Aggregations

TextIndexException (org.apache.jena.query.text.TextIndexException)22 RDFNode (org.apache.jena.rdf.model.RDFNode)17 Resource (org.apache.jena.rdf.model.Resource)14 Statement (org.apache.jena.rdf.model.Statement)12 ArrayList (java.util.ArrayList)7 Analyzer (org.apache.lucene.analysis.Analyzer)4 HashMap (java.util.HashMap)2 EntityDefinition (org.apache.jena.query.text.EntityDefinition)2 FilterSpec (org.apache.jena.query.text.assembler.GenericFilterAssembler.FilterSpec)2 TokenizerSpec (org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec)2 ParamSpec (org.apache.jena.query.text.assembler.Params.ParamSpec)2 Literal (org.apache.jena.rdf.model.Literal)2 CharArraySet (org.apache.lucene.analysis.CharArraySet)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 Reader (java.io.Reader)1 List (java.util.List)1 RDFDatatype (org.apache.jena.datatypes.RDFDatatype)1 Node (org.apache.jena.graph.Node)1 TextIndexConfig (org.apache.jena.query.text.TextIndexConfig)1 ConfigurableAnalyzer (org.apache.jena.query.text.analyzer.ConfigurableAnalyzer)1