Search in sources :

Example 6 with ByteArraySource

use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.

the class UsageExamples method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "example.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("example.txt");
    assertNotNull("Example Plain text content not found", in);
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Example 7 with ByteArraySource

use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.

the class ContentItemBackendTest method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "metadata.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("content.html");
    assertNotNull("HTML content not found", in);
    byte[] htmlData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(htmlData, "text/html; charset=UTF-8"));
    htmlContent = new String(htmlData, UTF8);
    //create a Blob with the text content
    in = getTestResource("content.txt");
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    assertNotNull("Plain text content not found", in);
    ci.addPart(new IRI(ci.getUri().getUnicodeString() + "_text"), ciFactory.createBlob(new ByteArraySource(textData, "text/plain; charset=UTF-8")));
    textContent = new String(textData, UTF8);
    //add the metadata
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Example 8 with ByteArraySource

use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.

the class BlobTest method testDefaultBinaryMimeType.

/**
     * Tests the default mimeType "application/octet-stream" for binary data.
     * @throws IOException
     */
@Test
public void testDefaultBinaryMimeType() throws IOException {
    Blob blob = createBlob(new ByteArraySource("dummy".getBytes(UTF8)));
    Assert.assertEquals("application/octet-stream", blob.getMimeType());
    Assert.assertTrue(blob.getParameter().isEmpty());
    blob = createBlob(new StreamSource(new ByteArrayInputStream("dummy".getBytes(UTF8))));
    Assert.assertEquals("application/octet-stream", blob.getMimeType());
    Assert.assertTrue(blob.getParameter().isEmpty());
}
Also used : Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ByteArrayInputStream(java.io.ByteArrayInputStream) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) Test(org.junit.Test)

Example 9 with ByteArraySource

use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.

the class BlobTest method testStringWithCustomCharset.

/**
     * This tests that texts with custom charsets are converted to UTF-8. 
     * @throws IOException
     */
@Test
public void testStringWithCustomCharset() throws IOException {
    String test = "Exámplê";
    Charset ISO8859_4 = Charset.forName("ISO-8859-4");
    //first via a StringSource
    ContentSource cs = new StringSource(test, ISO8859_4, "text/plain");
    Blob blob = createBlob(cs);
    Assert.assertEquals("text/plain", blob.getMimeType());
    Assert.assertTrue(blob.getParameter().containsKey("charset"));
    Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
    //2nd via a ByteArray
    byte[] data = test.getBytes(ISO8859_4);
    cs = new ByteArraySource(data, "text/plain; charset=" + ISO8859_4.name());
    blob = createBlob(cs);
    Assert.assertEquals("text/plain", blob.getMimeType());
    Assert.assertTrue(blob.getParameter().containsKey("charset"));
    Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
    //3rd as Stream
    cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; charset=" + ISO8859_4.name());
    blob = createBlob(cs);
    Assert.assertEquals("text/plain", blob.getMimeType());
    Assert.assertTrue(blob.getParameter().containsKey("charset"));
    Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
    cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; " + ISO8859_4.name());
}
Also used : ContentSource(org.apache.stanbol.enhancer.servicesapi.ContentSource) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ByteArrayInputStream(java.io.ByteArrayInputStream) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) Charset(java.nio.charset.Charset) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) Test(org.junit.Test)

Example 10 with ByteArraySource

use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.

the class ContentSourceTest method checkDataFromStringSource.

@Test
public void checkDataFromStringSource() throws IOException {
    ContentSource source = new ByteArraySource(DATA);
    Assert.assertTrue(Arrays.equals(DATA, source.getData()));
    //multiple calls must work
    source.getData();
}
Also used : ContentSource(org.apache.stanbol.enhancer.servicesapi.ContentSource) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) Test(org.junit.Test)

Aggregations

ByteArraySource (org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource)11 Test (org.junit.Test)9 ContentSource (org.apache.stanbol.enhancer.servicesapi.ContentSource)7 ByteArrayInputStream (java.io.ByteArrayInputStream)4 StreamSource (org.apache.stanbol.enhancer.servicesapi.impl.StreamSource)4 Graph (org.apache.clerezza.commons.rdf.Graph)3 IRI (org.apache.clerezza.commons.rdf.IRI)3 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)3 InputStream (java.io.InputStream)2 Triple (org.apache.clerezza.commons.rdf.Triple)2 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)2 ParsingProvider (org.apache.clerezza.rdf.core.serializedform.ParsingProvider)2 JenaParserProvider (org.apache.clerezza.rdf.jena.parser.JenaParserProvider)2 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)2 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)2 BeforeClass (org.junit.BeforeClass)2 BufferedInputStream (java.io.BufferedInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FilterInputStream (java.io.FilterInputStream)1 Charset (java.nio.charset.Charset)1