use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class RepresentationReader method parseFromContent.
public Map<String, Representation> parseFromContent(RequestData content, MediaType acceptedMediaType) {
// (3) Parse the Representtion(s) form the entity stream
if (content.getMediaType().isCompatible(MediaType.APPLICATION_JSON_TYPE)) {
//parse from json
throw new UnsupportedOperationException("Parsing of JSON not yet implemented :(");
} else if (isSupported(content.getMediaType())) {
//from RDF serialisation
RdfValueFactory valueFactory = RdfValueFactory.getInstance();
Map<String, Representation> representations = new HashMap<String, Representation>();
Set<BlankNodeOrIRI> processed = new HashSet<BlankNodeOrIRI>();
Graph graph = new IndexedGraph();
try {
parser.parse(graph, content.getEntityStream(), content.getMediaType().toString());
} catch (UnsupportedParsingFormatException e) {
//String acceptedMediaType = httpHeaders.getFirst("Accept");
//throw an internal server Error, because we check in
//isReadable(..) for supported types and still we get here a
//unsupported format -> therefore it looks like an configuration
//error the server (e.g. a missing Bundle with the required bundle)
String message = "Unable to create the Parser for the supported format" + content.getMediaType() + " (" + e + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.INTERNAL_SERVER_ERROR).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
} catch (RuntimeException e) {
//NOTE: Clerezza seams not to provide specific exceptions on
// parsing errors. Hence the catch for all RuntimeException
String message = "Unable to parse the provided RDF data (format: " + content.getMediaType() + ", message: " + e.getMessage() + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
for (Iterator<Triple> st = graph.iterator(); st.hasNext(); ) {
BlankNodeOrIRI resource = st.next().getSubject();
if (resource instanceof IRI && processed.add(resource)) {
//build a new representation
representations.put(((IRI) resource).getUnicodeString(), valueFactory.createRdfRepresentation((IRI) resource, graph));
}
}
return representations;
} else {
//unsupported media type
String message = String.format("Parsed Content-Type '%s' is not one of the supported %s", content.getMediaType(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method validateMorphoFeatureProperty.
/**
* [1..*] values of an {@link TypedLiteral} in the form {key=value}
* @param enhancements The graph with the enhancements
* @param textAnnotation the TextAnnotation to check
*/
private void validateMorphoFeatureProperty(Graph enhancements, BlankNodeOrIRI textAnnotation) {
//This taste checks for known morpho features of a given input (constant TERM)
Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
assertTrue("No POS Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
while (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
LexicalCategory cat = LexicalCategory.valueOf(key);
assertTrue("Part of Speech of " + TERM + " should be " + LexicalCategory.Noun, (cat == LexicalCategory.Noun));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
assertTrue("No Gender Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
Gender cat = Gender.valueOf(key);
assertTrue("Gender of " + TERM + " should be " + Gender.Feminine, (cat == Gender.Feminine));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
NumberFeature cat = NumberFeature.valueOf(key);
assertTrue("Number of " + TERM + " should be " + Gender.Feminine, (cat == NumberFeature.Singular));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type Literal", morphoFeature instanceof Literal);
assertFalse("Lemma forms MUST NOT be empty", ((Literal) morphoFeature).getLexicalForm().isEmpty());
String feature = ((Literal) morphoFeature).getLexicalForm();
assertTrue("Lemma of " + TERM + " should be " + TERM, (feature.equals(TERM)));
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testEngine.
@Test
public void testEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancement
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
//validate the enhancement metadata
validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
//validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class LocationEnhancementEngine method writeEntityEnhancement.
/**
* Writes an entity enhancement for the content item in the parsed graph
* based on the parsed toponym.
*
* @param contentItemId The id of the contentItem
* @param graph The graph used to write the triples
* @param literalFactory the literal factory used to create literals
* @param toponym the toponym
* @param relatedEnhancements related enhancements
* @param requiresEnhancements required enhancements
* @param defaultScore the score used as default id not present. This is
* used to parse the score of the Toponym if this method is used to add a
* parent Toponym.
*
* @return The IRI of the created entity enhancement
*/
private IRI writeEntityEnhancement(IRI contentItemId, Graph graph, LiteralFactory literalFactory, Toponym toponym, Collection<BlankNodeOrIRI> relatedEnhancements, Collection<BlankNodeOrIRI> requiresEnhancements, Double score) {
IRI entityRef = new IRI("http://sws.geonames.org/" + toponym.getGeoNameId() + '/');
FeatureClass featureClass = toponym.getFeatureClass();
log.debug(" > featureClass " + featureClass);
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(graph, this, contentItemId);
// first relate this entity annotation to the text annotation(s)
if (relatedEnhancements != null) {
for (BlankNodeOrIRI related : relatedEnhancements) {
graph.add(new TripleImpl(entityAnnotation, DC_RELATION, related));
}
}
if (requiresEnhancements != null) {
for (BlankNodeOrIRI requires : requiresEnhancements) {
graph.add(new TripleImpl(entityAnnotation, DC_REQUIRES, requires));
//STANBOL-767: also add dc:relation link
graph.add(new TripleImpl(entityAnnotation, DC_RELATION, requires));
}
}
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityRef));
log.debug(" > name " + toponym.getName());
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(toponym.getName())));
if (score != null) {
graph.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(score)));
}
//now get all the entity types for the results
Set<IRI> entityTypes = new HashSet<IRI>();
//first based on the feature class
Collection<IRI> featureClassTypes = FEATURE_CLASS_CONCEPT_MAPPINGS.get(featureClass);
if (featureClassTypes != null) {
entityTypes.addAll(featureClassTypes);
}
//second for the feature Code
String featureCode = toponym.getFeatureCode();
Collection<IRI> featureCodeTypes = FEATURE_TYPE_CONCEPT_MAPPINGS.get(featureCode);
if (featureCodeTypes != null) {
entityTypes.addAll(featureCodeTypes);
}
//third add the feature Code as additional type
entityTypes.add(new IRI(NamespaceEnum.geonames + featureClass.name() + '.' + featureCode));
//finally add the type triples to the enhancement
for (IRI entityType : entityTypes) {
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, entityType));
}
return entityAnnotation;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ClerezzaRDFUtils method findRoots.
public static Set<BlankNodeOrIRI> findRoots(Graph model) {
Set<BlankNodeOrIRI> roots = new HashSet<BlankNodeOrIRI>();
Set<BlankNodeOrIRI> visited = new HashSet<BlankNodeOrIRI>();
for (Triple t : model) {
BlankNodeOrIRI subj = t.getSubject();
findRoot(model, subj, roots, visited);
}
return roots;
}
Aggregations