use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TestHtmlExtractor method testRootExtraction.
/**
* This tests the merging of disconnected graphs under a single root
*
* @throws Exception
*/
@Test
public void testRootExtraction() throws Exception {
HtmlExtractor extractor = new HtmlExtractor(registry, parser);
Graph model = new SimpleGraph();
String testFile = "test-MultiRoot.html";
// extract text from RDFa annotated html
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
extractor.extract("file://" + testFile, in, null, "text/html", model);
// show triples
int tripleCounter = model.size();
LOG.debug("Triples: {}", tripleCounter);
printTriples(model);
Set<BlankNodeOrIRI> roots = ClerezzaRDFUtils.findRoots(model);
assertTrue(roots.size() > 1);
ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
roots = ClerezzaRDFUtils.findRoots(model);
assertEquals(1, roots.size());
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TestHtmlExtractor method testMFExtraction.
/**
* This tests some Microformat extraction
*
* @throws ExtractorException if there is an error during extraction
* @throws IOException if there is an error when reading the document
*/
@Test
public void testMFExtraction() throws Exception {
HtmlExtractor extractor = new HtmlExtractor(registry, parser);
Graph model = new SimpleGraph();
String testFile = "test-MF.html";
// extract text from RDFa annotated html
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
extractor.extract("file://" + testFile, in, null, "text/html", model);
// show triples
int tripleCounter = model.size();
LOG.debug("Microformat triples: {}", tripleCounter);
printTriples(model);
assertEquals(127, tripleCounter);
ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TestHtmlExtractor method testMicrodataExtraction.
/**
* This test some extraction of microdata from an HTML-5 document
*
* @throws Exception
*/
@Test
public void testMicrodataExtraction() throws Exception {
HtmlExtractor extractor = new HtmlExtractor(registry, parser);
Graph model = new SimpleGraph();
String testFile = "test-microdata.html";
// extract text from RDFa annotated html
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
extractor.extract("file://" + testFile, in, null, "text/html", model);
// show triples
int tripleCounter = model.size();
LOG.debug("Microdata triples: {}", tripleCounter);
printTriples(model);
assertEquals(91, tripleCounter);
ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TestEntityLinkingEnhancementEngine method initContentItem.
/**
* Creates and initialises a new content item using {@link #CONTEXT} as
* content and
* @return
* @throws IOException
*/
private ContentItem initContentItem() throws IOException {
ContentItem ci = ciFactory.createContentItem(new IRI("urn:iks-project:enhancer:text:content-item:person"), new StringSource(CONTEXT));
// add three text annotations to be consumed by this test
getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
// add the language
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
return ci;
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class SparqlDereferencer method dereference.
/*
* TODO: Supports only Triple serialisations as content types.
* To support other types one would need to create a select query and
* format the output accordingly.
* However it is not clear if such a functionality is needed.
*/
@Override
public final InputStream dereference(String uri, String contentType) throws IOException {
if (uri == null) {
return null;
}
IRI reference = new IRI(uri);
StringBuilder query = new StringBuilder();
query.append("CONSTRUCT { ");
query.append(reference);
query.append(" ?p ?o } WHERE { ");
query.append(reference);
query.append(" ?p ?o }");
// String format = SupportedFormat.RDF_XML;
return SparqlEndpointUtils.sendSparqlRequest(getAccessUri(), query.toString(), contentType);
}
Aggregations