use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class FictionBookParserTest method testEmbedded.
@Test
public void testEmbedded() throws Exception {
try (InputStream input = FictionBookParserTest.class.getResourceAsStream("/test-documents/test.fb2")) {
ContainerExtractor extractor = new ParserContainerExtractor();
TikaInputStream stream = TikaInputStream.get(input);
assertEquals(true, extractor.isSupported(stream));
// Process it
TrackingHandler handler = new TrackingHandler();
extractor.extract(stream, null, handler);
assertEquals(2, handler.filenames.size());
}
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class SQLite3ParserTest method testInputStreamReset.
//This confirms that reading the stream twice is not
//quadrupling the number of attachments.
@Test
public void testInputStreamReset() throws Exception {
//There should be 8 embedded documents:
//4x word files, two docs and two docxs
//4x png files, the same image embedded in each of the doc and docx
ParserContainerExtractor ex = new ParserContainerExtractor();
InputStreamResettingHandler byteCopier = new InputStreamResettingHandler();
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
try (InputStream is = getResourceAsStream(TEST_FILE1)) {
try (TikaInputStream tis = TikaInputStream.get(is)) {
ex.extract(tis, ex, byteCopier);
is.reset();
}
}
assertEquals(8, byteCopier.bytes.size());
}
use of org.apache.tika.io.TikaInputStream in project alliance by codice.
the class GetRelatedFilesRequestImpl method storeThumbnail.
private String storeThumbnail(Metacard metacard) throws IOException, MimeTypeException {
String id = metacard.getId();
byte[] thumbnailBytes = metacard.getThumbnail();
TikaInputStream tis = TikaInputStream.get(thumbnailBytes);
MediaType mediaType = DETECTOR.detect(tis, new Metadata());
MimeType mimeType = TikaConfig.getDefaultConfig().getMimeRepository().forName(mediaType.toString());
String fileName = id + "-THUMBNAIL" + mimeType.getExtension();
String urlStr = DEFAULT_PROTOCOL + "://" + location.host_name + (port == null ? "" : ":" + port) + location.path_name + "/" + fileName;
LOGGER.debug("Storing thumbnail for {} at location: {}", metacard.getTitle(), urlStr);
HttpPut httpPut = new HttpPut(urlStr);
HttpEntity entity = new ByteArrayEntity(thumbnailBytes);
httpPut.setEntity(entity);
Header contentTypeHeader = new BasicHeader("Content-Type", mediaType.toString());
httpPut.addHeader(contentTypeHeader);
HttpResponse response = httpClient.execute(httpPut);
int statusCode = response.getStatusLine().getStatusCode();
if (!(statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_CREATED || statusCode == HttpStatus.SC_ACCEPTED || statusCode == HttpStatus.SC_NO_CONTENT)) {
fileName = null;
LOGGER.debug("Unable to PUT file: code: {}, status: {}", statusCode, response.getStatusLine().getReasonPhrase());
}
return fileName;
}
use of org.apache.tika.io.TikaInputStream in project spring-boot-quick by vector4wang.
the class TikaUtil method handleStreamMetaDate.
public static Map<String, String> handleStreamMetaDate(byte[] file) throws Exception {
Map<String, String> meta = new HashMap<>();
Metadata md = new Metadata();
TikaInputStream input = TikaInputStream.get(file, md);
StringWriter textBuffer = new StringWriter();
ContentHandler handler = new TeeContentHandler(getTextContentHandler(textBuffer));
parser.parse(input, handler, md, context);
String[] names = md.names();
Arrays.sort(names);
for (String name : names) {
meta.put(name, md.get(name));
}
return meta;
}
use of org.apache.tika.io.TikaInputStream in project sling by apache.
the class ContentAwareMimeTypeServiceImpl method getMimeType.
public String getMimeType(String filename, InputStream content) throws IOException, IllegalArgumentException {
if (content == null) {
return mimeTypeService.getMimeType(filename);
}
if (!content.markSupported()) {
throw new IllegalArgumentException("Supplied InputStream does not support mark/reset");
}
TikaInputStream stream = TikaInputStream.get(content);
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
MediaType mediaType = detector.detect(stream, metadata);
return mediaType.toString();
}
Aggregations