use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EntityLinker method processRedirects.
/**
* Processes {@link EntitySearcher#getRedirectField() redirect field} values for
* the parsed suggestions based on the {@link RedirectProcessingMode}
* as configured in the {@link #config}.<p>
* The results of this method are stored within the parsed {@link Suggestion}s
* @param suggestion The suggestion to process.
* @throws EntitySearcherException
*/
private void processRedirects(Suggestion suggestion) throws EntitySearcherException {
//if mode is IGNORE -> nothing to do
if (linkerConfig.getRedirectProcessingMode() == RedirectProcessingMode.IGNORE) {
return;
}
//therefore there is a small internal state that stores this information
if (suggestion.isRedirectedProcessed()) {
//Redirects for ResultMatch are already processed ... ignore
return;
}
Entity result = suggestion.getResult();
Iterator<IRI> redirects = result.getReferences(linkerConfig.getRedirectField());
switch(linkerConfig.getRedirectProcessingMode()) {
case ADD_VALUES:
Graph entityData = result.getData();
IRI entityUri = result.getUri();
while (redirects.hasNext()) {
IRI redirect = redirects.next();
if (redirect != null) {
Entity redirectedEntity = entitySearcher.get(redirect, linkerConfig.getSelectedFields());
if (redirectedEntity != null) {
for (Iterator<Triple> data = redirectedEntity.getData().filter(redirectedEntity.getUri(), null, null); data.hasNext(); ) {
Triple t = data.next();
entityData.add(new TripleImpl(entityUri, t.getPredicate(), t.getObject()));
}
}
//set that the redirects where searched for this result
suggestion.setRedirectProcessed(true);
}
}
case FOLLOW:
while (redirects.hasNext()) {
IRI redirect = redirects.next();
if (redirect != null) {
Entity redirectedEntity = entitySearcher.get(redirect, linkerConfig.getSelectedFields());
if (redirectedEntity != null) {
suggestion.setRedirect(redirectedEntity);
}
}
}
//nothing to do
default:
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EntityLinkingEngineTest method testEngine.
/**
* This tests if the Enhancements created by the Engine confirm to the
* rules defined for the Stanbol Enhancement Structure.
* @throws IOException
* @throws EngineException
*/
@Test
public void testEngine() throws IOException, EngineException {
EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
//this is assumed by this test
linkerConfig.setMinFoundTokens(2);
EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer);
ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
//tells the engine that this is an English text
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
//and add the AnalysedText instance used for this test
ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
//compute the enhancements
engine.computeEnhancements(ci);
//validate the enhancement results
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//validate create fise:TextAnnotations
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
//validate create fise:EntityAnnotations
int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class NIFHelper method writePos.
/**
* Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed
* RDF graph by using the parsed segmentUri as subject
* @param graph the graph
* @param annotated the annotated element (e.g. a {@link Token})
* @param segmentUri the URI of the resource representing the parsed
* annotated element in the graph
*/
public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) {
Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
if (posTag != null) {
if (posTag.value().isMapped()) {
for (Pos pos : posTag.value().getPos()) {
graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri()));
}
for (LexicalCategory cat : posTag.value().getCategories()) {
graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri()));
}
}
graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag())));
graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability())));
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EnhancementEngineHelper method setOccurrence.
/**
* This method sets the fise:start, fise:end, fise:selection-prefix,
* fise:selected-text and fise:selection-suffix properties for the
* parsed fise:TextAnnotation instance according to the parsed parameters.<p>
* While it is intended to be used for TextAnnotations this method can also
* be used to add the mentioned properties to {@link IRI}s with different
* type.<p>
* <b>NOTE</b> the <code>allowSelectionHeadTail</code>: This parameter allows
* to deactivate the usage of fise:selection-head and fise:selection-tail.
* Typically users should parse <code>false</code> in case of 'named entities'
* and <code>true</code> in case sections of the text (e.g. phrases, sentences,
* chapters ...) are selected.
* @param metadata The RDF graph to add the information
* @param textAnnotation the IRI of the fise:TextAnnotation
* @param content the plain text content as String
* @param start the start index of the occurrence
* @param end the end index of the occurrence
* @param lang the lanugage of the content or <code>null</code> if not known
* @param prefixSuffixSize the size of the prefix, suffix. If the parsed
* value < 3 than the default 10 is used.
* @param allowSelectionHeadTail if <code>true</code> the fise:selection-head
* and fise:selection-tail properties are used instead of fise:selected-text
* if the selected text is longer as <code>Math.max(30, prefixSuffixSize*5);</code>.
* If <code>false</code> the fise:selected-text is added regardless of the
* size of the selected area.
* @since 0.11.0
*/
public static void setOccurrence(Graph metadata, IRI textAnnotation, String content, Integer start, Integer end, Language lang, int prefixSuffixSize, boolean allowSelectionHeadTail) {
//set start, end
metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, lf.createTypedLiteral(start)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, lf.createTypedLiteral(end)));
//set selection prefix and suffix (TextAnnotation new model)
prefixSuffixSize = prefixSuffixSize < MIN_PREFIX_SUFFIX_SIZE ? DEFAULT_PREFIX_SUFFIX_LENGTH : prefixSuffixSize;
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(content.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(content.substring(end, Math.min(content.length(), end + prefixSuffixSize)), lang)));
//set the selected text (or alternatively head and tail)
int maxSelectedTextSize = Math.max(MIN_SELECTEN_HEAD_TAIL_USAGE_LENGTH, prefixSuffixSize * 5);
if (!allowSelectionHeadTail || end - start <= maxSelectedTextSize) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(content.substring(start, end), lang)));
} else {
//selected area to long for fise:selected-text
//use fise:selection-head and fise:selection-tail instead
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(content.substring(start, start + prefixSuffixSize), lang)));
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(content.substring(end - prefixSuffixSize, end), lang)));
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EnhancementEngineHelper method set.
/**
* Replaces all current values of the property for the resource
* with the parsed values
* @param graph the graph
* @param resource the resource
* @param property the property
* @param value the value. In case it is an instance of {@link RDFTerm} it
* is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
* is used to create a {@link TypedLiteral} for the parsed value.
* @param literalFactory the {@link LiteralFactory} used in case the parsed
* value is not an {@link RDFTerm}
*/
public static void set(Graph graph, BlankNodeOrIRI resource, IRI property, Collection<?> values, LiteralFactory literalFactory) {
Iterator<Triple> currentValues = graph.filter(resource, property, null);
while (currentValues.hasNext()) {
currentValues.next();
currentValues.remove();
}
if (values != null) {
for (Object value : values) {
if (value instanceof RDFTerm) {
graph.add(new TripleImpl(resource, property, (RDFTerm) value));
} else if (value != null) {
graph.add(new TripleImpl(resource, property, literalFactory.createTypedLiteral(value)));
}
}
}
}
Aggregations