use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class MultipartRequestTest method testUploadWithMetadata.
/**
* Stanbol also supports to upload pre-existing metadata with the content.
* This UnitTest uses an example that parsed TextAnnotations for free text
* tags provided by users that are than linked to Entities in DBPedia
* @throws IOException
*/
@Test
public void testUploadWithMetadata() throws IOException {
//create the metadata
RDFTerm user = new PlainLiteralImpl("Rupert Westenthaler");
final IRI contentItemId = new IRI("http://www.example.com/test.html");
Graph metadata = new SimpleGraph();
addTagAsTextAnnotation(metadata, contentItemId, "Germany", DBPEDIA_PLACE, user);
addTagAsTextAnnotation(metadata, contentItemId, "Europe", DBPEDIA_PLACE, user);
addTagAsTextAnnotation(metadata, contentItemId, "NATO", DBPEDIA_ORGANISATION, user);
addTagAsTextAnnotation(metadata, contentItemId, "Silvio Berlusconi", DBPEDIA_PERSON, user);
String rdfContentType = SupportedFormat.RDF_XML;
ByteArrayOutputStream out = new ByteArrayOutputStream();
serializer.serialize(out, metadata, rdfContentType);
String rdfContent = new String(out.toByteArray(), UTF8);
MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
//add the metadata
/*
* NOTE: We need here to override the getFilename, because this MUST
* BE the URI of the ContentItem. This is important, because the
* Metadata do contain triples about that ContentItem and therefore
* it MUST BE assured that the URI of the ContentItem created by
* the Stanbol Enhancer is the same of as the URI used in the
* Metadata!
*/
ciBuilder.addPart("metadata", new StringBody(rdfContent, ContentType.create(rdfContentType).withCharset(UTF8)) {
@Override
public String getFilename() {
//uri of the ContentItem
return contentItemId.getUnicodeString();
}
});
//add the content
ciBuilder.addTextBody("content", HTML_CONTENT, ContentType.TEXT_HTML.withCharset(UTF8));
//send the request
String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(200).assertContentRegexp(//and the expected enhancements based on the parsed content
"http://purl.org/dc/terms/creator.*LanguageDetectionEnhancementEngine", "http://purl.org/dc/terms/language.*en", "http://fise.iks-project.eu/ontology/entity-label.*Paris", "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*NamedEntityExtractionEnhancementEngine", "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", //additional enhancements based on parsed metadata
"http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Germany.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/NATO.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Silvio_Berlusconi.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Europe.*").getContent();
log.debug("Content:\n{}\n", receivedContent);
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class EntityLinkingEngineTest method testEngine.
/**
* This tests if the Enhancements created by the Engine confirm to the
* rules defined for the Stanbol Enhancement Structure.
* @throws IOException
* @throws EngineException
*/
@Test
public void testEngine() throws IOException, EngineException {
EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
//this is assumed by this test
linkerConfig.setMinFoundTokens(2);
EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer);
ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
//tells the engine that this is an English text
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
//and add the AnalysedText instance used for this test
ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
//compute the enhancements
engine.computeEnhancements(ci);
//validate the enhancement results
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//validate create fise:TextAnnotations
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
//validate create fise:EntityAnnotations
int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class EnhancementEngineHelper method setOccurrence.
/**
* This method sets the fise:start, fise:end, fise:selection-prefix,
* fise:selected-text and fise:selection-suffix properties for the
* parsed fise:TextAnnotation instance according to the parsed parameters.<p>
* While it is intended to be used for TextAnnotations this method can also
* be used to add the mentioned properties to {@link IRI}s with different
* type.<p>
* <b>NOTE</b> the <code>allowSelectionHeadTail</code>: This parameter allows
* to deactivate the usage of fise:selection-head and fise:selection-tail.
* Typically users should parse <code>false</code> in case of 'named entities'
* and <code>true</code> in case sections of the text (e.g. phrases, sentences,
* chapters ...) are selected.
* @param metadata The RDF graph to add the information
* @param textAnnotation the IRI of the fise:TextAnnotation
* @param content the plain text content as String
* @param start the start index of the occurrence
* @param end the end index of the occurrence
* @param lang the lanugage of the content or <code>null</code> if not known
* @param prefixSuffixSize the size of the prefix, suffix. If the parsed
* value < 3 than the default 10 is used.
* @param allowSelectionHeadTail if <code>true</code> the fise:selection-head
* and fise:selection-tail properties are used instead of fise:selected-text
* if the selected text is longer as <code>Math.max(30, prefixSuffixSize*5);</code>.
* If <code>false</code> the fise:selected-text is added regardless of the
* size of the selected area.
* @since 0.11.0
*/
public static void setOccurrence(Graph metadata, IRI textAnnotation, String content, Integer start, Integer end, Language lang, int prefixSuffixSize, boolean allowSelectionHeadTail) {
//set start, end
metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, lf.createTypedLiteral(start)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, lf.createTypedLiteral(end)));
//set selection prefix and suffix (TextAnnotation new model)
prefixSuffixSize = prefixSuffixSize < MIN_PREFIX_SUFFIX_SIZE ? DEFAULT_PREFIX_SUFFIX_LENGTH : prefixSuffixSize;
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(content.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(content.substring(end, Math.min(content.length(), end + prefixSuffixSize)), lang)));
//set the selected text (or alternatively head and tail)
int maxSelectedTextSize = Math.max(MIN_SELECTEN_HEAD_TAIL_USAGE_LENGTH, prefixSuffixSize * 5);
if (!allowSelectionHeadTail || end - start <= maxSelectedTextSize) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(content.substring(start, end), lang)));
} else {
//selected area to long for fise:selected-text
//use fise:selection-head and fise:selection-tail instead
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(content.substring(start, start + prefixSuffixSize), lang)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(content.substring(end - prefixSuffixSize, end), lang)));
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class ExecutionPlanHelper method writeEnhancementProperty.
/**
* Writes enhancement property value(s) for the parsed node, property to the
* execution plan graph.
* @param ep the RDF graph holding the execution plan
* @param epNode the execution node
* @param property the property
* @param value the value(s). {@link Collection} and <code>Object[]</code> are
* supported for multiple values.
* @throws NullPointerException if any of the parsed parameter is <code>null</code>
*/
@SuppressWarnings("unchecked")
private static void writeEnhancementProperty(Graph ep, BlankNodeOrIRI epNode, IRI property, Object value) {
Collection<Object> values;
if (value instanceof Collection<?>) {
values = (Collection<Object>) value;
} else if (value instanceof Object[]) {
values = Arrays.asList((Object[]) value);
} else {
values = Collections.singleton(value);
}
for (Object v : values) {
if (v != null) {
Literal literal;
if (v instanceof String) {
literal = new PlainLiteralImpl((String) v);
} else {
try {
literal = lf.createTypedLiteral(v);
} catch (NoConvertorException e) {
log.warn("Use toString() value '{}' for EnhancementProperty " + "'{}' as no TypedLiteral converter is registered for " + "class {}", new Object[] { v, property, v.getClass().getName() });
literal = new PlainLiteralImpl(v.toString());
}
}
ep.add(new TripleImpl(epNode, property, literal));
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class ExecutionPlanHelper method writeExecutionNode.
/**
* Writes all triples for an ep:ExecutionNode to the parsed {@link Graph}.
* An {@link BlankNode} is use for representing the execution node resource.
* @param graph the graph to write the triples. MUST NOT be empty
* @param epNode the BlankNodeOrIRI representing the ep:ExecutionPlan
* @param engineName the name of the engine. MUST NOT be <code>null</code> nor empty
* @param optional if the execution of this node is optional or required
* @param dependsOn other nodes that MUST BE executed before this one. Parse
* <code>null</code> or an empty set if none.
* @param enhProps the EnhancementProperties for this ExecutionNode or
* <code>null</code> if none
* @return the resource representing the added ep:ExecutionNode.
* @since 0.12.1
*/
public static BlankNodeOrIRI writeExecutionNode(Graph graph, BlankNodeOrIRI epNode, String engineName, boolean optional, Set<BlankNodeOrIRI> dependsOn, Map<String, Object> enhProps) {
if (graph == null) {
throw new IllegalArgumentException("The parsed Graph MUST NOT be NULL!");
}
if (engineName == null || engineName.isEmpty()) {
throw new IllegalArgumentException("The parsed Engine name MUST NOT be NULL nor empty!");
}
if (epNode == null) {
throw new IllegalArgumentException("The ep:ExecutionPlan instance MUST NOT be NULL!");
}
BlankNodeOrIRI node = new BlankNode();
graph.add(new TripleImpl(epNode, HAS_EXECUTION_NODE, node));
graph.add(new TripleImpl(node, RDF_TYPE, EXECUTION_NODE));
graph.add(new TripleImpl(node, ENGINE, new PlainLiteralImpl(engineName)));
if (dependsOn != null) {
for (BlankNodeOrIRI dependend : dependsOn) {
if (dependend != null) {
graph.add(new TripleImpl(node, DEPENDS_ON, dependend));
}
}
}
graph.add(new TripleImpl(node, OPTIONAL, lf.createTypedLiteral(optional)));
writeEnhancementProperties(graph, node, engineName, enhProps);
return node;
}
Aggregations