Search in sources :

Example 6 with JenaParserProvider

use of org.apache.clerezza.rdf.jena.parser.JenaParserProvider in project stanbol by apache.

the class UsageExamples method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "example.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("example.txt");
    assertNotNull("Example Plain text content not found", in);
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Example 7 with JenaParserProvider

use of org.apache.clerezza.rdf.jena.parser.JenaParserProvider in project stanbol by apache.

the class ContentItemBackendTest method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "metadata.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("content.html");
    assertNotNull("HTML content not found", in);
    byte[] htmlData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(htmlData, "text/html; charset=UTF-8"));
    htmlContent = new String(htmlData, UTF8);
    //create a Blob with the text content
    in = getTestResource("content.txt");
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    assertNotNull("Plain text content not found", in);
    ci.addPart(new IRI(ci.getUri().getUnicodeString() + "_text"), ciFactory.createBlob(new ByteArraySource(textData, "text/plain; charset=UTF-8")));
    textContent = new String(textData, UTF8);
    //add the metadata
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Aggregations

JenaParserProvider (org.apache.clerezza.rdf.jena.parser.JenaParserProvider)7 ParsingProvider (org.apache.clerezza.rdf.core.serializedform.ParsingProvider)5 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 InputStream (java.io.InputStream)3 Graph (org.apache.clerezza.commons.rdf.Graph)3 BeforeClass (org.junit.BeforeClass)3 BufferedInputStream (java.io.BufferedInputStream)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 FilterInputStream (java.io.FilterInputStream)2 ZipInputStream (java.util.zip.ZipInputStream)2 IRI (org.apache.clerezza.commons.rdf.IRI)2 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)2 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)2 Triple (org.apache.clerezza.commons.rdf.Triple)2 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)2 ByteArraySource (org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource)2 ZipEntry (java.util.zip.ZipEntry)1 Parser (org.apache.clerezza.rdf.core.serializedform.Parser)1 Test (org.junit.Test)1