use of org.apache.jena.query.text.EntityDefinition in project jena by apache.
the class JenaTextExample1 method createCode.
public static Dataset createCode() {
log.info("Construct an in-memory dataset with in-memory lucene index using code");
// Build a text dataset by code.
// Here , in-memory base data and in-memeory Lucene index
// Base data
Dataset ds1 = DatasetFactory.create();
// Define the index mapping
EntityDefinition entDef = new EntityDefinition("uri", "text");
entDef.setPrimaryPredicate(RDFS.label.asNode());
// Lucene, in memory.
Directory dir = new RAMDirectory();
// Join together into a dataset
Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef));
return ds;
}
use of org.apache.jena.query.text.EntityDefinition in project jena by apache.
the class TextIndexESAssembler method open.
/*
<#index> a :TextIndexES ;
text:serverList "127.0.0.1:9300,127.0.0.2:9400,127.0.0.3:9500" ; #Comma separated list of hosts:ports
text:clusterName "elasticsearch"
text:shards "1"
text:replicas "1"
text:entityMap <#endMap> ;
.
*/
@Override
public TextIndex open(Assembler a, Resource root, Mode mode) {
try {
String listOfHostsAndPorts = GraphUtils.getAsStringValue(root, pServerList);
if (listOfHostsAndPorts == null || listOfHostsAndPorts.isEmpty()) {
throw new TextIndexException("Mandatory property text:serverList (containing the comma-separated list of host:port) property is not specified. " + "An example value for the property: 127.0.0.1:9300");
}
String[] hosts = listOfHostsAndPorts.split(COMMA);
Map<String, Integer> hostAndPortMapping = new HashMap<>();
for (String host : hosts) {
String[] hostAndPort = host.split(COLON);
if (hostAndPort.length < 2) {
LOGGER.error("Either the host or the port value is missing.Please specify the property in host:port format. " + "Both parts are mandatory. Ignoring this value. Moving to the next one.");
continue;
}
hostAndPortMapping.put(hostAndPort[0], Integer.valueOf(hostAndPort[1]));
}
String clusterName = GraphUtils.getAsStringValue(root, pClusterName);
if (clusterName == null || clusterName.isEmpty()) {
LOGGER.warn("ClusterName property is not specified. Defaulting to 'elasticsearch'");
clusterName = "elasticsearch";
}
String numberOfShards = GraphUtils.getAsStringValue(root, pShards);
if (numberOfShards == null || numberOfShards.isEmpty()) {
LOGGER.warn("shards property is not specified. Defaulting to '1'");
numberOfShards = "1";
}
String replicationFactor = GraphUtils.getAsStringValue(root, pReplicas);
if (replicationFactor == null || replicationFactor.isEmpty()) {
LOGGER.warn("replicas property is not specified. Defaulting to '1'");
replicationFactor = "1";
}
String indexName = GraphUtils.getAsStringValue(root, pIndexName);
if (indexName == null || indexName.isEmpty()) {
LOGGER.warn("index Name property is not specified. Defaulting to 'jena-text'");
indexName = "jena-text";
}
Resource r = GraphUtils.getResourceValue(root, pEntityMap);
EntityDefinition docDef = (EntityDefinition) a.open(r);
TextIndexConfig config = new TextIndexConfig(docDef);
//We have to create an ES specific settings class in order to pass the Index Initialization specific properties.
ESSettings settings = new ESSettings().builder().clusterName(clusterName).hostAndPortMap(hostAndPortMapping).shards(Integer.valueOf(numberOfShards)).replicas(Integer.valueOf(replicationFactor)).indexName(indexName).build();
return TextESDatasetFactory.createESIndex(config, settings);
} catch (Exception e) {
throw new TextIndexException("An exception occurred while trying to open/load the Assembler configuration. ", e);
}
}
use of org.apache.jena.query.text.EntityDefinition in project jena by apache.
the class BaseESTest method config.
/**
* Simple Config for text index
* @return
*/
private static TextIndexConfig config() {
EntityDefinition ed = new EntityDefinition(DOC_TYPE, "label", RDFS.label);
ed.set("comment", RDFS.comment.asNode());
ed.setLangField("lang");
TextIndexConfig config = new TextIndexConfig(ed);
return config;
}
use of org.apache.jena.query.text.EntityDefinition in project jena by apache.
the class EntityDefinitionAssembler method open.
/*
<#entMap> a text:EntityMap ;
text:entityField "uri" ;
text:defaultField "text" ;
text:map (
[ text:field "text" ; text:predicate rdfs:label ]
[ text:field "type" ; text:predicate rdfs:type ]
) .
*/
@Override
public EntityDefinition open(Assembler a, Resource root, Mode mode) {
String prologue = "PREFIX : <" + NS + "> PREFIX list: <http://jena.apache.org/ARQ/list#> ";
Model model = root.getModel();
String qs1 = StrUtils.strjoinNL(prologue, "SELECT * {", " ?eMap :entityField ?entityField ;", " :map ?map ;", " :defaultField ?dftField .", " OPTIONAL {", " ?eMap :graphField ?graphField", " }", " OPTIONAL {", " ?eMap :langField ?langField", " }", " OPTIONAL {", " ?eMap :uidField ?uidField", " }", "}");
ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1);
pss.setIri("eMap", root.getURI());
Query query1 = QueryFactory.create(pss.toString());
QueryExecution qexec1 = QueryExecutionFactory.create(query1, model);
ResultSet rs1 = qexec1.execSelect();
List<QuerySolution> results = ResultSetFormatter.toList(rs1);
if (results.size() == 0) {
Log.warn(this, "Failed to find a valid EntityMap for : " + root);
throw new TextIndexException("Failed to find a valid EntityMap for : " + root);
}
if (results.size() != 1) {
Log.warn(this, "Multiple matches for EntityMap for : " + root);
throw new TextIndexException("Multiple matches for EntityMap for : " + root);
}
QuerySolution qsol1 = results.get(0);
String entityField = qsol1.getLiteral("entityField").getLexicalForm();
String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null;
String uniqueIdField = qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null;
Multimap<String, Node> mapDefs = HashMultimap.create();
Map<String, Analyzer> analyzerDefs = new HashMap<>();
Statement listStmt = root.getProperty(TextVocab.pMap);
while (listStmt != null) {
RDFNode n = listStmt.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text list node is not a resource : " + n);
}
Resource listResource = n.asResource();
if (listResource.equals(RDF.nil)) {
// end of the list
break;
}
Statement listEntryStmt = listResource.getProperty(RDF.first);
if (listEntryStmt == null) {
throw new TextIndexException("Text map list is not well formed. No rdf:first property");
}
n = listEntryStmt.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text map list entry is not a resource : " + n);
}
Resource listEntry = n.asResource();
Statement fieldStatement = listEntry.getProperty(TextVocab.pField);
if (fieldStatement == null) {
throw new TextIndexException("Text map entry has no field property");
}
n = fieldStatement.getObject();
if (!n.isLiteral()) {
throw new TextIndexException("Text map entry field property has no literal value : " + n);
}
String field = n.asLiteral().getLexicalForm();
Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate);
if (predicateStatement == null) {
throw new TextIndexException("Text map entry has no predicate property");
}
n = predicateStatement.getObject();
if (!n.isURIResource()) {
throw new TextIndexException("Text map entry predicate property has non resource value : " + n);
}
mapDefs.put(field, n.asNode());
Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer);
if (analyzerStatement != null) {
n = analyzerStatement.getObject();
if (!n.isResource()) {
throw new TextIndexException("Text map entry analyzer property is not a resource : " + n);
}
Resource analyzerResource = n.asResource();
Analyzer analyzer = (Analyzer) a.open(analyzerResource);
analyzerDefs.put(field, analyzer);
}
// move on to the next element in the list
listStmt = listResource.getProperty(RDF.rest);
}
// Primary field/predicate
if (defaultField != null) {
Collection<Node> c = mapDefs.get(defaultField);
if (c.isEmpty())
throw new TextIndexException("No definition of primary field '" + defaultField + "'");
}
EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
docDef.setGraphField(graphField);
docDef.setLangField(langField);
docDef.setUidField(uniqueIdField);
for (String f : mapDefs.keys()) {
for (Node p : mapDefs.get(f)) docDef.set(f, p);
}
for (String f : analyzerDefs.keySet()) {
docDef.setAnalyzer(f, analyzerDefs.get(f));
}
return docDef;
}
use of org.apache.jena.query.text.EntityDefinition in project jena by apache.
the class TestEntityMapAssembler method EntityHasMapEntryWithSimpleAnalyzer.
@Test
public void EntityHasMapEntryWithSimpleAnalyzer() {
EntityDefinitionAssembler entDefAssem = new EntityDefinitionAssembler();
EntityDefinition entityDef = entDefAssem.open(Assembler.general, spec3, null);
assertEquals(SimpleAnalyzer.class, entityDef.getAnalyzer(SPEC1_DEFAULT_FIELD).getClass());
}
Aggregations