Search in sources :

Example 1 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class TestHtmlExtractor method testMicrodataExtraction.

/** This test some extraction of microdata from an HTML-5 document
     * 
     * @throws Exception
     */
@Test
public void testMicrodataExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-microdata.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Microdata triples: {}", tripleCounter);
    printTriples(model);
    assertEquals(91, tripleCounter);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 2 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class TestHtmlExtractor method testMFExtraction.

/** This tests some Microformat extraction
     * 
     * @throws ExtractorException if there is an error during extraction
     * @throws IOException if there is an error when reading the document
     */
@Test
public void testMFExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-MF.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Microformat triples: {}", tripleCounter);
    printTriples(model);
    assertEquals(127, tripleCounter);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 3 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class TestHtmlExtractor method testRootExtraction.

/** This tests the merging of disconnected graphs under a single root
     * 
     * @throws Exception
     */
@Test
public void testRootExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-MultiRoot.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Triples: {}", tripleCounter);
    printTriples(model);
    Set<BlankNodeOrIRI> roots = ClerezzaRDFUtils.findRoots(model);
    assertTrue(roots.size() > 1);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
    roots = ClerezzaRDFUtils.findRoots(model);
    assertEquals(1, roots.size());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 4 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class ClerezzaRDFUtils method urifyBlankNodes.

public static void urifyBlankNodes(Graph model) {
    HashMap<BlankNode, IRI> blankNodeMap = new HashMap<BlankNode, IRI>();
    Graph remove = new SimpleGraph();
    Graph add = new SimpleGraph();
    for (Triple t : model) {
        BlankNodeOrIRI subj = t.getSubject();
        RDFTerm obj = t.getObject();
        IRI pred = t.getPredicate();
        boolean match = false;
        if (subj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(subj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) subj, ru);
            }
            subj = ru;
        }
        if (obj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(obj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) obj, ru);
            }
            obj = ru;
        }
        if (match) {
            remove.add(t);
            add.add(new TripleImpl(subj, pred, obj));
        }
    }
    model.removeAll(remove);
    model.addAll(add);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 5 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class ResourceMapping method apply.

@Override
public boolean apply(Graph graph, BlankNodeOrIRI subject, Metadata metadata) {
    boolean added = false;
    BlankNodeOrIRI s = new BlankNode();
    mappingLogger.log(subject, ontProperty, null, s);
    if (!required.isEmpty()) {
        Graph g = new SimpleGraph();
        for (Mapping m : required) {
            if (!m.apply(g, s, metadata)) {
                return false;
            }
        }
        graph.addAll(g);
        added = true;
    }
    for (Mapping m : optional) {
        if (m.apply(graph, s, metadata)) {
            added = true;
        }
    }
    if (added) {
        for (Mapping m : additional) {
            m.apply(graph, s, metadata);
        }
        graph.add(new TripleImpl(subject, ontProperty, s));
    }
    return added;
}
Also used : SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)46 Graph (org.apache.clerezza.commons.rdf.Graph)34 IRI (org.apache.clerezza.commons.rdf.IRI)24 Test (org.junit.Test)17 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)12 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)11 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)10 Triple (org.apache.clerezza.commons.rdf.Triple)10 HashSet (java.util.HashSet)9 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 InputStream (java.io.InputStream)5 HtmlExtractor (org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor)5 RdfEntityFactory (org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory)5 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 IOException (java.io.IOException)3 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)3 JenaParserProvider (org.apache.clerezza.rdf.jena.parser.JenaParserProvider)3