Search in sources :

Example 1 with ParamSpec

use of org.apache.jena.query.text.assembler.Params.ParamSpec in project jena by apache.

the class GenericFilterAssembler method open.

/*
    <#indexLucene> a text:TextIndexLucene ;
        text:directory <file:Lucene> ;
        text:entityMap <#entMap> ;
        text:defineAnalyzers (
            [text:addLang "sa-x-iast" ;
             text:analyzer [ . . . ]]
            [text:defineAnalyzer <#foo> ;
             text:analyzer [ . . . ]]
            [text:defineFilter <#bar> ;
             text:filter [
               a text:GenericFilter ;
               text:class "org.apache.jena.query.text.filter.SelectiveFoldingFilter" ;
               text:params (
                    [ text:paramName "whitelisted" ;
                      text:paramType text:TypeSet ;
                      text:paramValue ("รง") ]
                    )
              ]
            ]
        )
     */
@Override
public FilterSpec open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pClass)) {
        // text:class is expected to be a string literal
        String className = root.getProperty(TextVocab.pClass).getString();
        // is the class accessible?
        Class<?> clazz = null;
        try {
            clazz = Class.forName(className);
        } catch (ClassNotFoundException e) {
            Log.error(this, "Filter class " + className + " not found. " + e.getMessage(), e);
            return null;
        }
        // Is the class an Analyzer?
        if (!TokenFilter.class.isAssignableFrom(clazz)) {
            Log.error(this, clazz.getName() + " has to be a subclass of " + TokenFilter.class.getName());
            return null;
        }
        if (root.hasProperty(TextVocab.pParams)) {
            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
            if (!node.isResource()) {
                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
            }
            List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
            // split the param specs into classes and values for constructor lookup
            // add an initial param for the TokenStream source. The source value is
            // set to null and the actual value supplied in ConfigurableAnalyzer when
            // used.
            final Class<?>[] paramClasses = new Class<?>[specs.size() + 1];
            paramClasses[0] = TokenStream.class;
            final Object[] paramValues = new Object[specs.size() + 1];
            paramValues[0] = null;
            for (int i = 0; i < specs.size(); i++) {
                ParamSpec spec = specs.get(i);
                paramClasses[i + 1] = spec.getValueClass();
                paramValues[i + 1] = spec.getValue();
            }
            // Create spec for new filter
            return new FilterSpec(clazz, paramClasses, paramValues);
        } else {
            // use the TokenStream constructor for the new filter
            return new FilterSpec(clazz, new Class<?>[] { TokenStream.class }, new Object[] { null });
        }
    } else {
        throw new TextIndexException("text:class property is required by GenericFilter: " + root);
    }
}
Also used : ParamSpec(org.apache.jena.query.text.assembler.Params.ParamSpec) TextIndexException(org.apache.jena.query.text.TextIndexException) RDFNode(org.apache.jena.rdf.model.RDFNode) TokenFilter(org.apache.lucene.analysis.TokenFilter)

Example 2 with ParamSpec

use of org.apache.jena.query.text.assembler.Params.ParamSpec in project jena by apache.

the class GenericTokenizerAssembler method open.

/*
    <#indexLucene> a text:TextIndexLucene ;
        text:directory <file:Lucene> ;
        text:entityMap <#entMap> ;
        text:defineAnalyzers (
            [text:addLang "sa-x-iast" ;
             text:analyzer [ . . . ]]
            [text:defineAnalyzer <#foo> ;
             text:analyzer [ . . . ]]
            [text:defineTokenizer <#bar> ;
             text:tokenizer [
               a text:GenericTokenizer ;
               text:class "org.apache.lucene.analysis.ngram.NGramTokenizer" ;
               text:params (
                    [ text:paramName "minGram" ;
                      text:paramType text:TypeInt ;
                      text:paramValue 3 ]
                    [ text:paramName "maxGram" ;
                      text:paramType text:TypeInt ;
                      text:paramValue 7 ]
                    )
              ]
            ]
        )
     */
@Override
public TokenizerSpec open(Assembler a, Resource root, Mode mode) {
    if (root.hasProperty(TextVocab.pClass)) {
        // text:class is expected to be a string literal
        String className = root.getProperty(TextVocab.pClass).getString();
        // is the class accessible?
        Class<?> clazz = null;
        try {
            clazz = Class.forName(className);
        } catch (ClassNotFoundException e) {
            Log.error(this, "Tokenizer class " + className + " not found. " + e.getMessage(), e);
            return null;
        }
        // Is the class an Tokenizer?
        if (!Tokenizer.class.isAssignableFrom(clazz)) {
            Log.error(this, clazz.getName() + " has to be a subclass of " + Tokenizer.class.getName());
            return null;
        }
        if (root.hasProperty(TextVocab.pParams)) {
            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
            if (!node.isResource()) {
                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
            }
            List<ParamSpec> specs = Params.getParamSpecs((Resource) node);
            // split the param specs into classes and values for constructor lookup
            final Class<?>[] paramClasses = new Class<?>[specs.size()];
            final Object[] paramValues = new Object[specs.size()];
            for (int i = 0; i < specs.size(); i++) {
                ParamSpec spec = specs.get(i);
                paramClasses[i] = spec.getValueClass();
                paramValues[i] = spec.getValue();
            }
            // Create new analyzer
            return new TokenizerSpec(clazz, paramClasses, paramValues);
        } else {
            // use the nullary Analyzer constructor
            return new TokenizerSpec(clazz, new Class<?>[0], new Object[0]);
        }
    } else {
        throw new TextIndexException("text:class property is required by GenericTokenizer: " + root);
    }
}
Also used : ParamSpec(org.apache.jena.query.text.assembler.Params.ParamSpec) TextIndexException(org.apache.jena.query.text.TextIndexException) Tokenizer(org.apache.lucene.analysis.Tokenizer) RDFNode(org.apache.jena.rdf.model.RDFNode)

Aggregations

TextIndexException (org.apache.jena.query.text.TextIndexException)2 ParamSpec (org.apache.jena.query.text.assembler.Params.ParamSpec)2 RDFNode (org.apache.jena.rdf.model.RDFNode)2 TokenFilter (org.apache.lucene.analysis.TokenFilter)1 Tokenizer (org.apache.lucene.analysis.Tokenizer)1