Search in sources :

Example 1 with TokenizerSpec

use of org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec in project jena by apache.

the class ConfigurableAnalyzer method getTokenizer.

private Tokenizer getTokenizer(String tokenizerName) {
    TokenizerSpec spec = tokenizerSpecs.get(tokenizerName);
    if (spec == null) {
        throw new TextIndexException("Unknown tokenizer : " + tokenizerName);
    }
    Class<?> clazz = spec.clazz;
    Class<?>[] paramClasses = spec.paramClasses;
    Object[] paramValues = spec.paramValues;
    return newTokenizer(clazz, paramClasses, paramValues);
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) TokenizerSpec(org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec)

Example 2 with TokenizerSpec

use of org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec in project jena by apache.

the class DefineTokenizersAssembler method open.

/*
    <#indexLucene> a text:TextIndexLucene ;
        text:directory <file:Lucene> ;
        text:entityMap <#entMap> ;
        text:defineAnalyzers (
            [text:addLang "sa-x-iast" ;
             text:analyzer [ . . . ]]
            [text:defineAnalyzer <#foo> ;
             text:analyzer [ . . . ]]
            [text:defineFilter <#bar> ;
             text:filter [ . . . ]]
            [text:defineTokenizer <#baz> ;
             text:tokenizer [ . . . ]]
        )
    */
public static boolean open(Assembler a, Resource list) {
    Resource current = list;
    boolean isMultilingualSupport = false;
    while (current != null && !current.equals(RDF.nil)) {
        Statement firstStmt = current.getProperty(RDF.first);
        if (firstStmt == null) {
            throw new TextIndexException("parameter list not well formed: " + current);
        }
        RDFNode first = firstStmt.getObject();
        if (!first.isResource()) {
            throw new TextIndexException("parameter specification must be an anon resource : " + first);
        }
        // process the current list element to add an analyzer
        Resource adding = (Resource) first;
        if (adding.hasProperty(TextVocab.pTokenizer)) {
            Statement tokenizerStmt = adding.getProperty(TextVocab.pTokenizer);
            RDFNode tokenizerNode = tokenizerStmt.getObject();
            if (!tokenizerNode.isResource()) {
                throw new TextIndexException("addTokenizers text:tokenizer must be an tokenizer spec resource: " + tokenizerNode);
            }
            TokenizerSpec spec = (TokenizerSpec) a.open((Resource) tokenizerNode);
            if (adding.hasProperty(TextVocab.pDefTokenizer)) {
                Statement defStmt = adding.getProperty(TextVocab.pDefTokenizer);
                Resource id = defStmt.getResource();
                if (id.getURI() != null) {
                    ConfigurableAnalyzer.defineTokenizer(id.getURI(), spec);
                } else {
                    throw new TextIndexException("addTokenizers text:defineTokenizer property must be a non-blank resource: " + adding);
                }
            }
        }
        Statement restStmt = current.getProperty(RDF.rest);
        if (restStmt == null) {
            throw new TextIndexException("parameter list not terminated by rdf:nil");
        }
        RDFNode rest = restStmt.getObject();
        if (!rest.isResource()) {
            throw new TextIndexException("parameter list node is not a resource : " + rest);
        }
        current = (Resource) rest;
    }
    return isMultilingualSupport;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) TokenizerSpec(org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec) Statement(org.apache.jena.rdf.model.Statement) Resource(org.apache.jena.rdf.model.Resource) RDFNode(org.apache.jena.rdf.model.RDFNode)

Aggregations

TextIndexException (org.apache.jena.query.text.TextIndexException)2 TokenizerSpec (org.apache.jena.query.text.assembler.GenericTokenizerAssembler.TokenizerSpec)2 RDFNode (org.apache.jena.rdf.model.RDFNode)1 Resource (org.apache.jena.rdf.model.Resource)1 Statement (org.apache.jena.rdf.model.Statement)1