Search in sources :

Example 11 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TestHtmlExtractor method testRootExtraction.

/**
 * This tests the merging of disconnected graphs under a single root
 *
 * @throws Exception
 */
@Test
public void testRootExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-MultiRoot.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Triples: {}", tripleCounter);
    printTriples(model);
    Set<BlankNodeOrIRI> roots = ClerezzaRDFUtils.findRoots(model);
    assertTrue(roots.size() > 1);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
    roots = ClerezzaRDFUtils.findRoots(model);
    assertEquals(1, roots.size());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 12 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TestHtmlExtractor method testMFExtraction.

/**
 * This tests some Microformat extraction
 *
 * @throws ExtractorException if there is an error during extraction
 * @throws IOException if there is an error when reading the document
 */
@Test
public void testMFExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-MF.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Microformat triples: {}", tripleCounter);
    printTriples(model);
    assertEquals(127, tripleCounter);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 13 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TestHtmlExtractor method testMicrodataExtraction.

/**
 * This test some extraction of microdata from an HTML-5 document
 *
 * @throws Exception
 */
@Test
public void testMicrodataExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-microdata.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Microdata triples: {}", tripleCounter);
    printTriples(model);
    assertEquals(91, tripleCounter);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 14 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TestEntityLinkingEnhancementEngine method initContentItem.

/**
 * Creates and initialises a new content item using {@link #CONTEXT} as
 * content and
 * @return
 * @throws IOException
 */
private ContentItem initContentItem() throws IOException {
    ContentItem ci = ciFactory.createContentItem(new IRI("urn:iks-project:enhancer:text:content-item:person"), new StringSource(CONTEXT));
    // add three text annotations to be consumed by this test
    getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
    getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
    getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
    // add the language
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    return ci;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 15 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class SparqlDereferencer method dereference.

/*
     * TODO: Supports only Triple serialisations as content types.
     * To support other types one would need to create a select query and
     * format the output accordingly.
     * However it is not clear if such a functionality is needed.
     */
@Override
public final InputStream dereference(String uri, String contentType) throws IOException {
    if (uri == null) {
        return null;
    }
    IRI reference = new IRI(uri);
    StringBuilder query = new StringBuilder();
    query.append("CONSTRUCT { ");
    query.append(reference);
    query.append(" ?p ?o } WHERE { ");
    query.append(reference);
    query.append(" ?p ?o }");
    // String format = SupportedFormat.RDF_XML;
    return SparqlEndpointUtils.sendSparqlRequest(getAccessUri(), query.toString(), contentType);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24