use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class ExecutionPlanHelper method validateExecutionPlan.
/**
* Utility that checks if the parsed graph contains a valid execution
* plan. This method is intended to be used by components that need to
* ensure that an parsed graph contains a valid execution plan.<p>
* This especially checks: <ul>
* <li> if for all {@link ExecutionPlan#EXECUTION_NODE}s
* <li> if they define a unary and valid value for the
* {@link ExecutionPlan#ENGINE} property and
* <li> if all {@link ExecutionPlan#DEPENDS_ON} values do actually point
* to an other execution node in the parsed graph
* <ul><p>
* This method does not modify the parsed graph. Therefore it is save
* to parse a {@link ImmutableGraph} object.<p>
* TODO: There is no check for cycles implemented yet.
* @param the graph to check
* @return the engine names referenced by the validated execution plan-
* @throws ChainException
*/
public static Set<String> validateExecutionPlan(Graph executionPlan) throws ChainException {
Iterator<Triple> executionNodeIt = executionPlan.filter(null, RDF_TYPE, EXECUTION_NODE);
Set<String> engineNames = new HashSet<String>();
Map<BlankNodeOrIRI, Collection<BlankNodeOrIRI>> nodeDependencies = new HashMap<BlankNodeOrIRI, Collection<BlankNodeOrIRI>>();
// 1. check the ExecutionNodes
while (executionNodeIt.hasNext()) {
BlankNodeOrIRI node = executionNodeIt.next().getSubject();
Iterator<String> engines = EnhancementEngineHelper.getStrings(executionPlan, node, ENGINE);
if (!engines.hasNext()) {
throw new ChainException("Execution Node " + node + " does not define " + "the required property " + ENGINE + "!");
}
String engine = engines.next();
if (engines.hasNext()) {
throw new ChainException("Execution Node " + node + " does not define " + "multiple values for the property " + ENGINE + "!");
}
if (engine.isEmpty()) {
throw new ChainException("Execution Node " + node + " does not define " + "an empty String as engine name (property " + ENGINE + ")!");
}
engineNames.add(engine);
Collection<BlankNodeOrIRI> dependsOn = new HashSet<BlankNodeOrIRI>();
for (Iterator<Triple> t = executionPlan.filter(node, DEPENDS_ON, null); t.hasNext(); ) {
RDFTerm o = t.next().getObject();
if (o instanceof BlankNodeOrIRI) {
dependsOn.add((BlankNodeOrIRI) o);
} else {
throw new ChainException("Execution Node " + node + " defines the literal '" + o + "' as value for the " + DEPENDS_ON + " property. However this" + "property requires values to be bNodes or URIs.");
}
}
nodeDependencies.put(node, dependsOn);
}
// 2. now check the dependency graph
for (Entry<BlankNodeOrIRI, Collection<BlankNodeOrIRI>> entry : nodeDependencies.entrySet()) {
if (entry.getValue() != null) {
for (BlankNodeOrIRI dependent : entry.getValue()) {
if (!nodeDependencies.containsKey(dependent)) {
throw new ChainException("Execution Node " + entry.getKey() + " defines a dependency to an non existent ex:ExectutionNode " + dependent + "!");
}
// else the dependency is valid
}
}
// no dependencies
}
// done ... the parsed graph survived all consistency checks :)
return engineNames;
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementStructureHelper method validateLanguageAnnotations.
/**
* Validates the correctness of fise:TextAnnotations that annotate the language
* of the text as defined by
* <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
* Called by {@link #validateTextAnnotation(Graph, IRI, String, Map)}
* @param enhancements
* @param textAnnotation
*/
private static void validateLanguageAnnotations(Graph enhancements, IRI textAnnotation) {
Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
if (dcLanguageIterator.hasNext()) {
// a language annotation
RDFTerm dcLanguageResource = dcLanguageIterator.next().getObject();
assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof Literal);
assertTrue("The dc:language value '" + dcLanguageResource + "'MUST BE at least two chars long", ((Literal) dcLanguageResource).getLexicalForm().length() >= 2);
assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri " + textAnnotation + ")", dcLanguageIterator.hasNext());
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM, dcTypeIterator.next().getObject());
assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
// assert that the created TextAnnotation is correctly returned by the
// EnhancementEngineHelper methods
List<BlankNodeOrIRI> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
assertTrue("Language annotation " + textAnnotation + " was not returned by " + "EnhancementEngineHelper.getLanguageAnnotations(..)!", languageAnnotation.contains(textAnnotation));
} else {
// no language annotation
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
while (dcTypeIterator.hasNext()) {
assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the " + "dc:type value dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
}
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementStructureHelper method validateNERAnnotations.
/**
* Validates that fise:TextAnnotations with the dc:type dbp-ont:Person,
* dbp-ont:Organisation and dbp-ont:Place do have a
* fise:selected-text value (this implicitly also checks that
* fise:selection-context, fise:start and fise:end are defined!<p>
* Called by {@link #validateTextAnnotation(Graph, IRI, String, Map)}
* @param enhancements
* @param textAnnotation
* @param selectedTextResource the fise:selected-text value
*/
private static void validateNERAnnotations(Graph enhancements, IRI textAnnotation, RDFTerm selectedTextResource) {
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
boolean isNERAnnotation = false;
while (dcTypeIterator.hasNext() && !isNERAnnotation) {
RDFTerm dcTypeValue = dcTypeIterator.next().getObject();
isNERAnnotation = DBPEDIA_PERSON.equals(dcTypeValue) || DBPEDIA_ORGANISATION.equals(dcTypeValue) || DBPEDIA_PLACE.equals(dcTypeValue);
}
if (isNERAnnotation) {
assertNotNull("fise:TextAnnotations with a dc:type of c:type dbp-ont:Person, " + "dbp-ont:Organisation or dbp-ont:Place MUST have a fise:selected-text value (uri " + textAnnotation + ")", selectedTextResource);
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementStructureHelper method validateAllTopicAnnotations.
/**
* Validates all fise:TopicAnnotations contained by the parsed enhancements
* graph.
* @param enhancements the enhancement graph
* @param expectedValues the expected values of all validated TopicAnnotations.
* Properties are used as keys. Typical example would be fise:extracted-from
* with the id of the ContentItem as value; dc-terms:creator with the
* {@link Class#getName()} as value.
* @return the number of found and validated TopicAnnotations.
*/
@SuppressWarnings("unchecked")
public static int validateAllTopicAnnotations(Graph enhancements, Map<IRI, RDFTerm> expectedValues) {
expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
Iterator<Triple> topicAnnotationIterator = enhancements.filter(null, RDF_TYPE, ENHANCER_TOPICANNOTATION);
int topicAnnotationCount = 0;
while (topicAnnotationIterator.hasNext()) {
IRI topicAnnotation = (IRI) topicAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTopicAnnotation(enhancements, topicAnnotation, expectedValues);
topicAnnotationCount++;
}
return topicAnnotationCount;
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementStructureHelper method validateAllEntityAnnotations.
/**
* Validates all fise:EntityAnnotations contained by the parsed enhancements
* graph.
* @param enhancements the enhancement graph
* @param expectedValues the expected values of all validated EntityAnnotations.
* Properties are used as keys. Typical example would be fise:extracted-from
* with the id of the ContentItem as value; dc-terms:creator with the
* {@link Class#getName()} as value.
* @return the number of found and validated EntityAnnotations.
*/
@SuppressWarnings("unchecked")
public static int validateAllEntityAnnotations(Graph enhancements, Map<IRI, RDFTerm> expectedValues) {
expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
Iterator<Triple> entityAnnotationIterator = enhancements.filter(null, RDF_TYPE, ENHANCER_ENTITYANNOTATION);
int entityAnnotationCount = 0;
while (entityAnnotationIterator.hasNext()) {
IRI entityAnnotation = (IRI) entityAnnotationIterator.next().getSubject();
// test if selected Text is added
validateEntityAnnotation(enhancements, entityAnnotation, expectedValues);
entityAnnotationCount++;
}
return entityAnnotationCount;
}
Aggregations