use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.
the class UsageExamples method readTestData.
@BeforeClass
public static void readTestData() throws IOException {
//add the metadata
ParsingProvider parser = new JenaParserProvider();
//create the content Item with the HTML content
Graph rdfData = parseRdfData(parser, "example.rdf.zip");
IRI contentItemId = null;
Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
while (it.hasNext()) {
RDFTerm r = it.next().getObject();
if (contentItemId == null) {
if (r instanceof IRI) {
contentItemId = (IRI) r;
}
} else {
assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
}
}
assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
InputStream in = getTestResource("example.txt");
assertNotNull("Example Plain text content not found", in);
byte[] textData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
ci.getMetadata().addAll(rdfData);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.
the class ContentItemBackendTest method readTestData.
@BeforeClass
public static void readTestData() throws IOException {
//add the metadata
ParsingProvider parser = new JenaParserProvider();
//create the content Item with the HTML content
Graph rdfData = parseRdfData(parser, "metadata.rdf.zip");
IRI contentItemId = null;
Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
while (it.hasNext()) {
RDFTerm r = it.next().getObject();
if (contentItemId == null) {
if (r instanceof IRI) {
contentItemId = (IRI) r;
}
} else {
assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
}
}
assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
InputStream in = getTestResource("content.html");
assertNotNull("HTML content not found", in);
byte[] htmlData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(htmlData, "text/html; charset=UTF-8"));
htmlContent = new String(htmlData, UTF8);
//create a Blob with the text content
in = getTestResource("content.txt");
byte[] textData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
assertNotNull("Plain text content not found", in);
ci.addPart(new IRI(ci.getUri().getUnicodeString() + "_text"), ciFactory.createBlob(new ByteArraySource(textData, "text/plain; charset=UTF-8")));
textContent = new String(textData, UTF8);
//add the metadata
ci.getMetadata().addAll(rdfData);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.
the class BlobTest method testDefaultBinaryMimeType.
/**
* Tests the default mimeType "application/octet-stream" for binary data.
* @throws IOException
*/
@Test
public void testDefaultBinaryMimeType() throws IOException {
Blob blob = createBlob(new ByteArraySource("dummy".getBytes(UTF8)));
Assert.assertEquals("application/octet-stream", blob.getMimeType());
Assert.assertTrue(blob.getParameter().isEmpty());
blob = createBlob(new StreamSource(new ByteArrayInputStream("dummy".getBytes(UTF8))));
Assert.assertEquals("application/octet-stream", blob.getMimeType());
Assert.assertTrue(blob.getParameter().isEmpty());
}
use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.
the class BlobTest method testStringWithCustomCharset.
/**
* This tests that texts with custom charsets are converted to UTF-8.
* @throws IOException
*/
@Test
public void testStringWithCustomCharset() throws IOException {
String test = "Exámplê";
Charset ISO8859_4 = Charset.forName("ISO-8859-4");
//first via a StringSource
ContentSource cs = new StringSource(test, ISO8859_4, "text/plain");
Blob blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
//2nd via a ByteArray
byte[] data = test.getBytes(ISO8859_4);
cs = new ByteArraySource(data, "text/plain; charset=" + ISO8859_4.name());
blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
//3rd as Stream
cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; charset=" + ISO8859_4.name());
blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; " + ISO8859_4.name());
}
use of org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource in project stanbol by apache.
the class ContentSourceTest method checkDataFromStringSource.
@Test
public void checkDataFromStringSource() throws IOException {
ContentSource source = new ByteArraySource(DATA);
Assert.assertTrue(Arrays.equals(DATA, source.getData()));
//multiple calls must work
source.getData();
}
Aggregations