use of org.apache.tika.mime.MediaType in project tika by apache.
the class PackageParserTest method testCoverage.
@Test
public void testCoverage() throws Exception {
//test that the package parser covers all inputstreams handled
//by ArchiveStreamFactory. When we update commons-compress, and they add
//a new stream type, we want to make sure that we're handling it.
ArchiveStreamFactory archiveStreamFactory = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
PackageParser packageParser = new PackageParser();
ParseContext parseContext = new ParseContext();
for (String name : archiveStreamFactory.getInputStreamArchiveNames()) {
MediaType mt = PackageParser.getMediaType(name);
//name of the missing stream
if (mt.equals(MediaType.OCTET_STREAM)) {
fail("getting octet-stream for: " + name);
}
if (!packageParser.getSupportedTypes(parseContext).contains(mt)) {
fail("PackageParser should support: " + mt.toString());
}
}
}
use of org.apache.tika.mime.MediaType in project tika by apache.
the class SourceCodeParser method getRenderer.
private Renderer getRenderer(String mimeType) {
MediaType mt = MediaType.parse(mimeType);
String type = TYPES_TO_RENDERER.get(mt);
if (type == null) {
throw new RuntimeException("unparseable content type " + mimeType);
}
return XhtmlRendererFactory.getRenderer(type);
}
use of org.apache.tika.mime.MediaType in project tika by apache.
the class TikaMimeTypes method getMimeTypesHTML.
@GET
@Produces("text/html")
public String getMimeTypesHTML() {
StringBuffer h = new StringBuffer();
html.generateHeader(h, "Apache Tika Supported Mime Types");
// Get our types
List<MediaTypeDetails> types = getMediaTypes();
// Get the first type in each section
SortedMap<String, String> firstType = new TreeMap<String, String>();
for (MediaTypeDetails type : types) {
if (!firstType.containsKey(type.type.getType())) {
firstType.put(type.type.getType(), type.type.toString());
}
}
h.append("<ul>");
for (String section : firstType.keySet()) {
h.append("<li><a href=\"#").append(firstType.get(section)).append("\">").append(section).append("</a></li>\n");
}
h.append("</ul>");
// Output all of them
for (MediaTypeDetails type : types) {
h.append("<a name=\"").append(type.type).append("\"></a>\n");
h.append("<h2>").append(type.type).append("</h2>\n");
for (MediaType alias : type.aliases) {
h.append("<div>Alias: ").append(alias).append("</div>\n");
}
if (type.supertype != null) {
h.append("<div>Super Type: <a href=\"#").append(type.supertype).append("\">").append(type.supertype).append("</a></div>\n");
}
if (type.parser != null) {
h.append("<div>Parser: ").append(type.parser).append("</div>\n");
}
}
html.generateFooter(h);
return h.toString();
}
use of org.apache.tika.mime.MediaType in project tika by apache.
the class RFC822ParserTest method testExtractAttachments.
@Test
public void testExtractAttachments() throws Exception {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
Parser p = new RFC822Parser();
ParseContext context = new ParseContext();
try (InputStream stream = getStream("test-documents/testEmailWithPNGAtt.eml")) {
p.parse(stream, handler, metadata, context);
}
// Check we go the metadata
assertEquals("Tika Test <XXXX@apache.org>", metadata.get(Metadata.MESSAGE_FROM));
assertEquals("Test Attachment Email", metadata.get(TikaCoreProperties.TITLE));
// Try again with attachment detecting and fetching
final Detector detector = new DefaultDetector();
final Parser extParser = new AutoDetectParser();
final List<MediaType> seenTypes = new ArrayList<MediaType>();
final List<String> seenText = new ArrayList<String>();
EmbeddedDocumentExtractor ext = new EmbeddedDocumentExtractor() {
@Override
public boolean shouldParseEmbedded(Metadata metadata) {
return true;
}
@Override
public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
seenTypes.add(detector.detect(stream, metadata));
ContentHandler h = new BodyContentHandler();
try {
extParser.parse(stream, h, metadata, new ParseContext());
} catch (TikaException e) {
throw new RuntimeException(e);
}
seenText.add(h.toString());
}
};
context.set(EmbeddedDocumentExtractor.class, ext);
try (InputStream stream = getStream("test-documents/testEmailWithPNGAtt.eml")) {
p.parse(stream, handler, metadata, context);
}
// Check we go the metadata
assertEquals("Tika Test <XXXX@apache.org>", metadata.get(Metadata.MESSAGE_FROM));
assertEquals("Test Attachment Email", metadata.get(TikaCoreProperties.TITLE));
// Check attachments
assertEquals(2, seenTypes.size());
assertEquals(2, seenText.size());
assertEquals("text/plain", seenTypes.get(0).toString());
assertEquals("image/png", seenTypes.get(1).toString());
assertEquals("This email has a PNG attachment included in it\n\n", seenText.get(0));
}
use of org.apache.tika.mime.MediaType in project winery by eclipse.
the class BackendUtils method getMimeType.
/**
* Detect the mime type of the stream. The stream is marked at the beginning and reset at the end
*
* @param bis the stream
* @param fn the fileName of the file belonging to the stream
*/
public static MediaType getMimeType(BufferedInputStream bis, String fn) throws IOException {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, fn);
final MediaType mediaType = detector.detect(bis, md);
return mediaType;
}
Aggregations