use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class DereferenceEngineConfig method parseEntityReferences.
/**
* Parses the URIs for the {@link DereferenceConstants#ENTITY_REFERENCE_PROPERTIES}
* @return
* @throws ConfigurationException
*/
private Set<IRI> parseEntityReferences() throws ConfigurationException {
Set<IRI> entityRefPropUris;
Collection<String> entityProps = EnhancementEngineHelper.getConfigValues(config, ENTITY_REFERENCES, String.class);
if (entityProps == null || entityProps.isEmpty()) {
entityRefPropUris = DEFAULT_ENTITY_REFERENCES;
} else {
entityRefPropUris = new HashSet<IRI>(entityProps.size());
for (String prop : entityProps) {
if (!StringUtils.isBlank(prop)) {
entityRefPropUris.add(new IRI(getConfiguredUri(nsPrefixService, ENTITY_REFERENCES, prop.trim())));
}
}
}
return entityRefPropUris;
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class DereferenceEngineTest method validateDereferencedEntities.
private void validateDereferencedEntities(Graph metadata, IRI... entityReferenceFields) {
Graph expected = new IndexedGraph();
for (IRI entityReferenceField : entityReferenceFields) {
Iterator<Triple> referenced = metadata.filter(null, entityReferenceField, null);
while (referenced.hasNext()) {
IRI entity = (IRI) referenced.next().getObject();
Iterator<Triple> entityTriples = testData.filter(entity, null, null);
while (entityTriples.hasNext()) {
expected.add(entityTriples.next());
}
}
}
Graph notExpected = new IndexedGraph(testData);
notExpected.removeAll(expected);
Assert.assertTrue(metadata.containsAll(expected));
Assert.assertTrue(Collections.disjoint(metadata, notExpected));
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class SpotlightEngineUtils method createEntityAnnotation.
/**
* Creates a fise:EntityAnnotation for the parsed parameters and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param resource the candidate resource
* @param engine the engine
* @param ci the content item
* @param textAnnotation the fise:TextAnnotation to dc:relate the
* created fise:EntityAnnotation
* @return the URI of the created fise:TextAnnotation
*/
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
Graph model = ci.getMetadata();
Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
return entityAnnotation;
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class ContentItemResource method getPlacesAsJSON.
/**
* @return an RDF/JSON descriptions of places for the word map widget
*/
public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
Graph g = new IndexedGraph();
LiteralFactory lf = LiteralFactory.getInstance();
Graph metadata = contentItem.getMetadata();
for (EntityExtractionSummary p : getPlaceOccurrences()) {
EntitySuggestion bestGuess = p.getBestGuess();
if (bestGuess == null) {
continue;
}
IRI uri = new IRI(bestGuess.getUri());
Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
if (latitudes.hasNext()) {
g.add(latitudes.next());
}
Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
if (longitutes.hasNext()) {
g.add(longitutes.next());
g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
}
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
serializer.serialize(out, g, SupportedFormat.RDF_JSON);
String rdfString = out.toString("utf-8");
return rdfString;
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class ContentItemReaderWriterTest method createTestContentItem.
/**
* @return
*/
@BeforeClass
public static void createTestContentItem() throws IOException {
contentItem = ciFactory.createContentItem(new IRI("urn:test"), new StringSource("<html>\n" + " <body>\n" + " This is a <b>ContentItem</b> to <i>Mime Multipart</i> test!\n" + " </body>\n" + "</html>", "text/html"));
RuntimeDelegate.setInstance(new RuntimeDelegateImpl());
contentItem.addPart(new IRI("run:text:text"), ciFactory.createBlob(new StringSource("This is a ContentItem to Mime Multipart test!")));
contentItem.getMetadata().add(new TripleImpl(new IRI("urn:test"), RDF.type, new IRI("urn:types:Document")));
// mark the main content as parsed and also that all
// contents and contentparts should be included
Map<String, Object> properties = initRequestPropertiesContentPart(contentItem);
properties.put(PARSED_CONTENT_URIS, Collections.singleton(contentItem.getPartUri(0).getUnicodeString()));
properties.put(OUTPUT_CONTENT, Collections.singleton("*/*"));
properties.put(OUTPUT_CONTENT_PART, Collections.singleton("*"));
properties.put(RDF_FORMAT, "application/rdf+xml");
Graph em = initExecutionMetadataContentPart(contentItem);
BlankNodeOrIRI ep = createExecutionPlan(em, "testChain", null);
writeExecutionNode(em, ep, "testEngine", true, null, null);
initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false);
ciWriter = new ContentItemWriter(Serializer.getInstance());
ciReader = new ContentItemReader() {
@Override
protected Parser getParser() {
return Parser.getInstance();
}
@Override
protected ContentItemFactory getContentItemFactory() {
return ciFactory;
}
};
}
Aggregations