use of org.apache.tika.detect.CompositeDetector in project tika by apache.
the class TikaDetectorConfigTest method testPSTDetectionWithoutZipDetector.
/**
* TIKA-1708 - If the Zip detector is disabled, either explicitly,
* or via giving a list of detectors that it isn't part of, ensure
* that detection of PST files still works
*/
@Test
public void testPSTDetectionWithoutZipDetector() throws Exception {
// Check the one with an exclude
TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
assertNotNull(configWX.getParser());
assertNotNull(configWX.getDetector());
CompositeDetector detectorWX = (CompositeDetector) configWX.getDetector();
// Check it has the POIFS one, but not the zip one
assertDetectors(detectorWX, true, false);
// Check the one with an explicit list
TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
assertNotNull(configCL.getParser());
assertNotNull(configCL.getDetector());
CompositeDetector detectorCL = (CompositeDetector) configCL.getDetector();
assertEquals(2, detectorCL.getDetectors().size());
// Check it also has the POIFS one, but not the zip one
assertDetectors(detectorCL, true, false);
// Check that both detectors have a mimetypes with entries
assertTrue("Not enough mime types: " + configWX.getMediaTypeRegistry().getTypes().size(), configWX.getMediaTypeRegistry().getTypes().size() > 100);
assertTrue("Not enough mime types: " + configCL.getMediaTypeRegistry().getTypes().size(), configCL.getMediaTypeRegistry().getTypes().size() > 100);
// Now check they detect PST files correctly
TikaInputStream stream = TikaInputStream.get(getResourceAsFile("/test-documents/testPST.pst"));
assertEquals(OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, detectorWX.detect(stream, new Metadata()));
assertEquals(OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE, detectorCL.detect(stream, new Metadata()));
}
use of org.apache.tika.detect.CompositeDetector in project tika by apache.
the class TikaCLI method displayDetector.
private void displayDetector(Detector d, int i) {
boolean isComposite = (d instanceof CompositeDetector);
String name = d.getClass().getName();
System.out.println(indent(i) + name + (isComposite ? " (Composite Detector):" : ""));
if (isComposite) {
List<Detector> subDetectors = ((CompositeDetector) d).getDetectors();
for (Detector sd : subDetectors) {
displayDetector(sd, i + 2);
}
}
}
use of org.apache.tika.detect.CompositeDetector in project tika by apache.
the class DumpTikaConfigExampleTest method testDump.
@Test
public void testDump() throws Exception {
DumpTikaConfigExample ex = new DumpTikaConfigExample();
for (Charset charset : new Charset[] { UTF_8, UTF_16LE }) {
for (TikaConfigSerializer.Mode mode : TikaConfigSerializer.Mode.values()) {
Writer writer = new OutputStreamWriter(new FileOutputStream(configFile), charset);
TikaConfigSerializer.serialize(TikaConfig.getDefaultConfig(), mode, writer, charset);
writer.flush();
writer.close();
TikaConfig c = new TikaConfig(configFile);
assertTrue(c.getParser().toString(), c.getParser() instanceof CompositeParser);
assertTrue(c.getDetector().toString(), c.getDetector() instanceof CompositeDetector);
CompositeParser p = (CompositeParser) c.getParser();
assertTrue("enough parsers?", p.getParsers().size() > 130);
CompositeDetector d = (CompositeDetector) c.getDetector();
assertTrue("enough detectors?", d.getDetectors().size() > 3);
//just try to load it into autodetect to make sure no errors are thrown
Parser auto = new AutoDetectParser(c);
assertNotNull(auto);
}
}
}
use of org.apache.tika.detect.CompositeDetector in project tika by apache.
the class TikaDetectors method detectorAsHTML.
private void detectorAsHTML(Detector d, StringBuffer html, int level) {
html.append("<h");
html.append(level);
html.append(">");
String name = d.getClass().getName();
html.append(name.substring(name.lastIndexOf('.') + 1));
html.append("</h");
html.append(level);
html.append(">");
html.append("<p>Class: ");
html.append(name);
html.append("</p>");
if (d instanceof CompositeDetector) {
html.append("<p>Composite Detector</p>");
for (Detector cd : ((CompositeDetector) d).getDetectors()) {
detectorAsHTML(cd, html, level + 1);
}
}
}
use of org.apache.tika.detect.CompositeDetector in project tika by apache.
the class TikaDetectors method renderDetector.
private void renderDetector(Detector d, StringBuffer text, int indent) {
boolean isComposite = (d instanceof CompositeDetector);
String name = d.getClass().getName();
for (int i = 0; i < indent; i++) {
text.append(" ");
}
text.append(name);
if (isComposite) {
text.append(" (Composite Detector):\n");
List<Detector> subDetectors = ((CompositeDetector) d).getDetectors();
for (Detector sd : subDetectors) {
renderDetector(sd, text, indent + 1);
}
} else {
text.append("\n");
}
}
Aggregations