use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class ContentItemBackendTest method testSuggestedEntityWithoutParsedContext.
@Test
public void testSuggestedEntityWithoutParsedContext() throws LDPathParseException {
//The suggestedEntity function can be used for twi usecases
//(1) get the {limit} top rated linked Entities per parsed context
// In this example we parse all TextAnnotations
//NOTE: '.' MUST BE used as first argument in this case
String path = "fn:textAnnotation()/fn:suggestedEntity(\"1\")";
Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 2);
Set<IRI> expectedValues = new HashSet<IRI>(Arrays.asList(new IRI("http://dbpedia.org/resource/Paris"), new IRI("http://dbpedia.org/resource/Bob_Marley")));
for (RDFTerm r : result) {
assertTrue(r instanceof IRI);
log.info("Entity: {}", r);
assertTrue(expectedValues.remove(r));
}
assertTrue(expectedValues.isEmpty());
//(2) get the {limit} top rated Entities for all Annotations parsed
// as the first argument
//NOTE: the selector parsing all Annotations MUST BE used as first
// argument
path = "fn:suggestedEntity(fn:textAnnotation(),\"1\")";
result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 1);
assertEquals(new IRI("http://dbpedia.org/resource/Paris"), result.iterator().next());
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class UsageExamples method exampleExtractedPlaces.
/**
* This provides some example on how to select persons extracted from
* a contentItem
* @throws LDPathParseException
*/
@Test
public void exampleExtractedPlaces() throws LDPathParseException {
StringBuilder program = new StringBuilder();
program.append("locationMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fise:selected-text :: xsd:string;");
//this uses the labels of suggested places with the highest confidence
//but also the selected-text as fallback if no entity is suggested.
program.append("locationNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
program.append("linkedPlaces = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
//this selects only linked Artists
program.append("linkedCountries = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:Country]/fise:entity-reference :: xsd:anyURI;");
Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
log.info("- - - - - - - - - - - - -");
log.info("Places Indexing Examples");
Map<String, Collection<?>> result = execute(personProgram);
assertNotNull(result);
assertFalse(result.isEmpty());
logResults(result);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class TikaEngineTest method verifyBlankNodeOrIRI.
private static BlankNodeOrIRI verifyBlankNodeOrIRI(ContentItem ci, IRI subject, IRI property) {
Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
assertTrue(it.hasNext());
RDFTerm r = it.next().getObject();
assertFalse(it.hasNext());
assertTrue(r instanceof BlankNodeOrIRI);
return (BlankNodeOrIRI) r;
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class TestNamedEntityExtractionEnhancementEngine method testCustomModel.
@Test
public void testCustomModel() throws EngineException, IOException {
ContentItem ci = wrapAsContentItem("urn:test:content-item:single:sentence", EHEALTH, "en");
//this test does not use default models
nerEngine.config.getDefaultModelTypes().clear();
//but instead a custom model provided by the test data
nerEngine.config.addCustomNameFinderModel("en", "bionlp2004-DNA-en.bin");
nerEngine.config.setMappedType("DNA", new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
nerEngine.computeEnhancements(ci);
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//and dc:type values MUST be the URI set as mapped type
expectedValues.put(Properties.DC_TYPE, new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
Graph g = ci.getMetadata();
int textAnnotationCount = validateAllTextAnnotations(g, EHEALTH, expectedValues);
assertEquals(7, textAnnotationCount);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class TestMetaxaCore method printTriples.
/**
* This prints out the Stanbol Enhancer triples that would be created for the metadata
* contained in the given model.
*
* @param m a {@link Model}
*
* @return an {@code int} with the number of added triples
*/
private int printTriples(Model m) {
int tripleCounter = 0;
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
ClosableIterator<Statement> it = m.iterator();
while (it.hasNext()) {
Statement oneStmt = it.next();
BlankNodeOrIRI subject = (BlankNodeOrIRI) MetaxaEngine.asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) MetaxaEngine.asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = MetaxaEngine.asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
LOG.debug("adding " + t);
tripleCounter++;
} else {
LOG.debug("skipped " + oneStmt.toString());
}
}
it.close();
return tripleCounter;
}
Aggregations