use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class ContentItemBackendTest method readTestData.
@BeforeClass
public static void readTestData() throws IOException {
//add the metadata
ParsingProvider parser = new JenaParserProvider();
//create the content Item with the HTML content
Graph rdfData = parseRdfData(parser, "metadata.rdf.zip");
IRI contentItemId = null;
Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
while (it.hasNext()) {
RDFTerm r = it.next().getObject();
if (contentItemId == null) {
if (r instanceof IRI) {
contentItemId = (IRI) r;
}
} else {
assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
}
}
assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
InputStream in = getTestResource("content.html");
assertNotNull("HTML content not found", in);
byte[] htmlData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(htmlData, "text/html; charset=UTF-8"));
htmlContent = new String(htmlData, UTF8);
//create a Blob with the text content
in = getTestResource("content.txt");
byte[] textData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
assertNotNull("Plain text content not found", in);
ci.addPart(new IRI(ci.getUri().getUnicodeString() + "_text"), ciFactory.createBlob(new ByteArraySource(textData, "text/plain; charset=UTF-8")));
textContent = new String(textData, UTF8);
//add the metadata
ci.getMetadata().addAll(rdfData);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class ContentItemBackendTest method testSuggestedEntity.
@Test
public void testSuggestedEntity() throws LDPathParseException {
//The suggestedEntity function can be used for twi usecases
//(1) get the {limit} top rated linked Entities per parsed context
// In this example we parse all TextAnnotations
//NOTE: '.' MUST BE used as first argument in this case
String path = "fn:textAnnotation(.)/fn:suggestedEntity(.,\"1\")";
Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 2);
Set<IRI> expectedValues = new HashSet<IRI>(Arrays.asList(new IRI("http://dbpedia.org/resource/Paris"), new IRI("http://dbpedia.org/resource/Bob_Marley")));
for (RDFTerm r : result) {
assertTrue(r instanceof IRI);
log.info("Entity: {}", r);
assertTrue(expectedValues.remove(r));
}
assertTrue(expectedValues.isEmpty());
//(2) get the {limit} top rated Entities for all Annotations parsed
// as the first argument
//NOTE: the selector parsing all Annotations MUST BE used as first
// argument
path = "fn:suggestedEntity(fn:textAnnotation(.),\"1\")";
result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 1);
assertEquals(new IRI("http://dbpedia.org/resource/Paris"), result.iterator().next());
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class UsageExamples method exampleExtractedPersons.
/**
* This provides some example on how to select persons extracted from
* a contentItem
* @throws LDPathParseException
*/
@Test
public void exampleExtractedPersons() throws LDPathParseException {
StringBuilder program = new StringBuilder();
program.append("personMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fise:selected-text :: xsd:string;");
//this uses the labels of suggested person with the highest confidence
//but also the selected-text as fallback if no entity is suggested.
program.append("personNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
program.append("linkedPersons = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
//this selects only linked Artists
program.append("linkedArtists = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:Artist]/fise:entity-reference :: xsd:anyURI;");
Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
log.info("- - - - - - - - - - - - - ");
log.info("Person Indexing Examples");
Map<String, Collection<?>> result = execute(personProgram);
assertNotNull(result);
assertFalse(result.isEmpty());
logResults(result);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class UsageExamples method exampleExtractedOrganization.
/**
* This provides some example on how to select persons extracted from
* a contentItem
* @throws LDPathParseException
*/
@Test
public void exampleExtractedOrganization() throws LDPathParseException {
StringBuilder program = new StringBuilder();
program.append("orgMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fise:selected-text :: xsd:string;");
//this uses the labels of suggested organisations with the highest confidence
//but also the selected-text as fallback if no entity is suggested.
program.append("orgNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
program.append("linkedOrgs = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
//this selects only linked education organisations
//NOTE: this does not use a limit on suggestion(.)!
program.append("linkedEducationOrg = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:EducationalInstitution]/fise:entity-reference :: xsd:anyURI;");
Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
log.info("- - - - - - - - - - - - -");
log.info("Places Indexing Examples");
Map<String, Collection<?>> result = execute(personProgram);
assertNotNull(result);
assertFalse(result.isEmpty());
logResults(result);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class UsageExamples method exampleExtractedConcepts.
/**
* This provides some example on how to select persons extracted from
* a contentItem
* @throws LDPathParseException
*/
@Test
public void exampleExtractedConcepts() throws LDPathParseException {
StringBuilder program = new StringBuilder();
program.append("conceptNames = fn:entityAnnotation()" + "[fise:entity-type is skos:Concept]/fise:entity-label :: xsd:anyURI;");
//this uses the labels of suggested person with the highest confidence
//but also the selected-text as fallback if no entity is suggested.
program.append("linkedConcepts = fn:entityAnnotation()" + "[fise:entity-type is skos:Concept]/fise:entity-reference :: xsd:anyURI;");
Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
log.info("- - - - - - - - - - - - -");
log.info("Concept Indexing Examples");
Map<String, Collection<?>> result = execute(personProgram);
assertNotNull(result);
assertFalse(result.isEmpty());
logResults(result);
}
Aggregations