Search in sources :

Example 6 with EntityDefinition

use of org.apache.jena.query.text.EntityDefinition in project jena by apache.

the class JenaTextExample1 method createCode.

public static Dataset createCode() {
    log.info("Construct an in-memory dataset with in-memory lucene index using code");
    // Build a text dataset by code.
    // Here , in-memory base data and in-memeory Lucene index
    // Base data
    Dataset ds1 = DatasetFactory.create();
    // Define the index mapping 
    EntityDefinition entDef = new EntityDefinition("uri", "text");
    entDef.setPrimaryPredicate(RDFS.label.asNode());
    // Lucene, in memory.
    Directory dir = new RAMDirectory();
    // Join together into a dataset
    Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef));
    return ds;
}
Also used : EntityDefinition(org.apache.jena.query.text.EntityDefinition) TextIndexConfig(org.apache.jena.query.text.TextIndexConfig) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 7 with EntityDefinition

use of org.apache.jena.query.text.EntityDefinition in project jena by apache.

the class TextIndexESAssembler method open.

/*
    <#index> a :TextIndexES ;
        text:serverList "127.0.0.1:9300,127.0.0.2:9400,127.0.0.3:9500" ; #Comma separated list of hosts:ports
        text:clusterName "elasticsearch"
        text:shards "1"
        text:replicas "1"
        text:entityMap <#endMap> ;
        .
    */
@Override
public TextIndex open(Assembler a, Resource root, Mode mode) {
    try {
        String listOfHostsAndPorts = GraphUtils.getAsStringValue(root, pServerList);
        if (listOfHostsAndPorts == null || listOfHostsAndPorts.isEmpty()) {
            throw new TextIndexException("Mandatory property text:serverList (containing the comma-separated list of host:port) property is not specified. " + "An example value for the property: 127.0.0.1:9300");
        }
        String[] hosts = listOfHostsAndPorts.split(COMMA);
        Map<String, Integer> hostAndPortMapping = new HashMap<>();
        for (String host : hosts) {
            String[] hostAndPort = host.split(COLON);
            if (hostAndPort.length < 2) {
                LOGGER.error("Either the host or the port value is missing.Please specify the property in host:port format. " + "Both parts are mandatory. Ignoring this value. Moving to the next one.");
                continue;
            }
            hostAndPortMapping.put(hostAndPort[0], Integer.valueOf(hostAndPort[1]));
        }
        String clusterName = GraphUtils.getAsStringValue(root, pClusterName);
        if (clusterName == null || clusterName.isEmpty()) {
            LOGGER.warn("ClusterName property is not specified. Defaulting to 'elasticsearch'");
            clusterName = "elasticsearch";
        }
        String numberOfShards = GraphUtils.getAsStringValue(root, pShards);
        if (numberOfShards == null || numberOfShards.isEmpty()) {
            LOGGER.warn("shards property is not specified. Defaulting to '1'");
            numberOfShards = "1";
        }
        String replicationFactor = GraphUtils.getAsStringValue(root, pReplicas);
        if (replicationFactor == null || replicationFactor.isEmpty()) {
            LOGGER.warn("replicas property is not specified. Defaulting to '1'");
            replicationFactor = "1";
        }
        String indexName = GraphUtils.getAsStringValue(root, pIndexName);
        if (indexName == null || indexName.isEmpty()) {
            LOGGER.warn("index Name property is not specified. Defaulting to 'jena-text'");
            indexName = "jena-text";
        }
        Resource r = GraphUtils.getResourceValue(root, pEntityMap);
        EntityDefinition docDef = (EntityDefinition) a.open(r);
        TextIndexConfig config = new TextIndexConfig(docDef);
        //We have to create an ES specific settings class in order to pass the Index Initialization specific properties.
        ESSettings settings = new ESSettings().builder().clusterName(clusterName).hostAndPortMap(hostAndPortMapping).shards(Integer.valueOf(numberOfShards)).replicas(Integer.valueOf(replicationFactor)).indexName(indexName).build();
        return TextESDatasetFactory.createESIndex(config, settings);
    } catch (Exception e) {
        throw new TextIndexException("An exception occurred while trying to open/load the Assembler configuration. ", e);
    }
}
Also used : EntityDefinition(org.apache.jena.query.text.EntityDefinition) TextIndexException(org.apache.jena.query.text.TextIndexException) HashMap(java.util.HashMap) TextIndexConfig(org.apache.jena.query.text.TextIndexConfig) Resource(org.apache.jena.rdf.model.Resource) TextIndexException(org.apache.jena.query.text.TextIndexException)

Example 8 with EntityDefinition

use of org.apache.jena.query.text.EntityDefinition in project jena by apache.

the class BaseESTest method config.

/**
     * Simple Config for text index
     * @return
     */
private static TextIndexConfig config() {
    EntityDefinition ed = new EntityDefinition(DOC_TYPE, "label", RDFS.label);
    ed.set("comment", RDFS.comment.asNode());
    ed.setLangField("lang");
    TextIndexConfig config = new TextIndexConfig(ed);
    return config;
}
Also used : EntityDefinition(org.apache.jena.query.text.EntityDefinition) TextIndexConfig(org.apache.jena.query.text.TextIndexConfig)

Example 9 with EntityDefinition

use of org.apache.jena.query.text.EntityDefinition in project jena by apache.

the class EntityDefinitionAssembler method open.

/*
<#entMap> a text:EntityMap ;
    text:entityField      "uri" ;
    text:defaultField     "text" ;
    text:map (
         [ text:field "text" ; text:predicate rdfs:label ]
         [ text:field "type" ; text:predicate rdfs:type  ]
         ) .
     */
@Override
public EntityDefinition open(Assembler a, Resource root, Mode mode) {
    String prologue = "PREFIX : <" + NS + ">   PREFIX list: <http://jena.apache.org/ARQ/list#> ";
    Model model = root.getModel();
    String qs1 = StrUtils.strjoinNL(prologue, "SELECT * {", "  ?eMap  :entityField  ?entityField ;", "         :map ?map ;", "         :defaultField ?dftField .", "  OPTIONAL {", "    ?eMap :graphField ?graphField", "  }", "  OPTIONAL {", "    ?eMap :langField ?langField", "  }", "  OPTIONAL {", "    ?eMap :uidField ?uidField", "  }", "}");
    ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1);
    pss.setIri("eMap", root.getURI());
    Query query1 = QueryFactory.create(pss.toString());
    QueryExecution qexec1 = QueryExecutionFactory.create(query1, model);
    ResultSet rs1 = qexec1.execSelect();
    List<QuerySolution> results = ResultSetFormatter.toList(rs1);
    if (results.size() == 0) {
        Log.warn(this, "Failed to find a valid EntityMap for : " + root);
        throw new TextIndexException("Failed to find a valid EntityMap for : " + root);
    }
    if (results.size() != 1) {
        Log.warn(this, "Multiple matches for EntityMap for : " + root);
        throw new TextIndexException("Multiple matches for EntityMap for : " + root);
    }
    QuerySolution qsol1 = results.get(0);
    String entityField = qsol1.getLiteral("entityField").getLexicalForm();
    String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
    String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
    String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null;
    String uniqueIdField = qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null;
    Multimap<String, Node> mapDefs = HashMultimap.create();
    Map<String, Analyzer> analyzerDefs = new HashMap<>();
    Statement listStmt = root.getProperty(TextVocab.pMap);
    while (listStmt != null) {
        RDFNode n = listStmt.getObject();
        if (!n.isResource()) {
            throw new TextIndexException("Text list node is not a resource : " + n);
        }
        Resource listResource = n.asResource();
        if (listResource.equals(RDF.nil)) {
            // end of the list
            break;
        }
        Statement listEntryStmt = listResource.getProperty(RDF.first);
        if (listEntryStmt == null) {
            throw new TextIndexException("Text map list is not well formed.  No rdf:first property");
        }
        n = listEntryStmt.getObject();
        if (!n.isResource()) {
            throw new TextIndexException("Text map list entry is not a resource : " + n);
        }
        Resource listEntry = n.asResource();
        Statement fieldStatement = listEntry.getProperty(TextVocab.pField);
        if (fieldStatement == null) {
            throw new TextIndexException("Text map entry has no field property");
        }
        n = fieldStatement.getObject();
        if (!n.isLiteral()) {
            throw new TextIndexException("Text map entry field property has no literal value : " + n);
        }
        String field = n.asLiteral().getLexicalForm();
        Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate);
        if (predicateStatement == null) {
            throw new TextIndexException("Text map entry has no predicate property");
        }
        n = predicateStatement.getObject();
        if (!n.isURIResource()) {
            throw new TextIndexException("Text map entry predicate property has non resource value : " + n);
        }
        mapDefs.put(field, n.asNode());
        Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer);
        if (analyzerStatement != null) {
            n = analyzerStatement.getObject();
            if (!n.isResource()) {
                throw new TextIndexException("Text map entry analyzer property is not a resource : " + n);
            }
            Resource analyzerResource = n.asResource();
            Analyzer analyzer = (Analyzer) a.open(analyzerResource);
            analyzerDefs.put(field, analyzer);
        }
        // move on to the next element in the list
        listStmt = listResource.getProperty(RDF.rest);
    }
    // Primary field/predicate
    if (defaultField != null) {
        Collection<Node> c = mapDefs.get(defaultField);
        if (c.isEmpty())
            throw new TextIndexException("No definition of primary field '" + defaultField + "'");
    }
    EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
    docDef.setGraphField(graphField);
    docDef.setLangField(langField);
    docDef.setUidField(uniqueIdField);
    for (String f : mapDefs.keys()) {
        for (Node p : mapDefs.get(f)) docDef.set(f, p);
    }
    for (String f : analyzerDefs.keySet()) {
        docDef.setAnalyzer(f, analyzerDefs.get(f));
    }
    return docDef;
}
Also used : TextIndexException(org.apache.jena.query.text.TextIndexException) HashMap(java.util.HashMap) Node(org.apache.jena.graph.Node) Analyzer(org.apache.lucene.analysis.Analyzer) EntityDefinition(org.apache.jena.query.text.EntityDefinition)

Example 10 with EntityDefinition

use of org.apache.jena.query.text.EntityDefinition in project jena by apache.

the class TestEntityMapAssembler method EntityHasMapEntryWithSimpleAnalyzer.

@Test
public void EntityHasMapEntryWithSimpleAnalyzer() {
    EntityDefinitionAssembler entDefAssem = new EntityDefinitionAssembler();
    EntityDefinition entityDef = entDefAssem.open(Assembler.general, spec3, null);
    assertEquals(SimpleAnalyzer.class, entityDef.getAnalyzer(SPEC1_DEFAULT_FIELD).getClass());
}
Also used : EntityDefinition(org.apache.jena.query.text.EntityDefinition) Test(org.junit.Test)

Aggregations

EntityDefinition (org.apache.jena.query.text.EntityDefinition)13 Test (org.junit.Test)9 TextIndexConfig (org.apache.jena.query.text.TextIndexConfig)3 HashMap (java.util.HashMap)2 TextIndexException (org.apache.jena.query.text.TextIndexException)2 Node (org.apache.jena.graph.Node)1 Resource (org.apache.jena.rdf.model.Resource)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 Directory (org.apache.lucene.store.Directory)1 RAMDirectory (org.apache.lucene.store.RAMDirectory)1