use of org.apache.tika.config.TikaConfig in project tika by apache.
the class DumpTikaConfigExampleTest method testDump.
@Test
public void testDump() throws Exception {
DumpTikaConfigExample ex = new DumpTikaConfigExample();
for (Charset charset : new Charset[] { UTF_8, UTF_16LE }) {
for (TikaConfigSerializer.Mode mode : TikaConfigSerializer.Mode.values()) {
Writer writer = new OutputStreamWriter(new FileOutputStream(configFile), charset);
TikaConfigSerializer.serialize(TikaConfig.getDefaultConfig(), mode, writer, charset);
writer.flush();
writer.close();
TikaConfig c = new TikaConfig(configFile);
assertTrue(c.getParser().toString(), c.getParser() instanceof CompositeParser);
assertTrue(c.getDetector().toString(), c.getDetector() instanceof CompositeDetector);
CompositeParser p = (CompositeParser) c.getParser();
assertTrue("enough parsers?", p.getParsers().size() > 130);
CompositeDetector d = (CompositeDetector) c.getDetector();
assertTrue("enough detectors?", d.getDetectors().size() > 3);
//just try to load it into autodetect to make sure no errors are thrown
Parser auto = new AutoDetectParser(c);
assertNotNull(auto);
}
}
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class TikaServerCli method main.
public static void main(String[] args) {
LOG.info("Starting {} server", new Tika());
try {
Options options = getOptions();
CommandLineParser cliParser = new GnuParser();
CommandLine line = cliParser.parse(options, args);
if (line.hasOption("help")) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("tikaserver", options);
System.exit(-1);
}
String host = DEFAULT_HOST;
if (line.hasOption("host")) {
host = line.getOptionValue("host");
if ("*".equals(host)) {
host = "0.0.0.0";
}
}
int port = DEFAULT_PORT;
if (line.hasOption("port")) {
port = Integer.valueOf(line.getOptionValue("port"));
}
boolean returnStackTrace = false;
if (line.hasOption("includeStack")) {
returnStackTrace = true;
}
TikaLoggingFilter logFilter = null;
if (line.hasOption("log")) {
String logLevel = line.getOptionValue("log");
if (LOG_LEVELS.contains(logLevel)) {
boolean isInfoLevel = "info".equals(logLevel);
logFilter = new TikaLoggingFilter(isInfoLevel);
} else {
LOG.info("Unsupported request URI log level: {}", logLevel);
}
}
CrossOriginResourceSharingFilter corsFilter = null;
if (line.hasOption("cors")) {
corsFilter = new CrossOriginResourceSharingFilter();
String url = line.getOptionValue("cors");
List<String> origins = new ArrayList<String>();
// Empty list allows all origins.
if (!url.equals("*"))
origins.add(url);
corsFilter.setAllowOrigins(origins);
}
// The Tika Configuration to use throughout
TikaConfig tika;
if (line.hasOption("config")) {
String configFilePath = line.getOptionValue("config");
LOG.info("Using custom config: {}", configFilePath);
tika = new TikaConfig(configFilePath);
} else {
tika = TikaConfig.getDefaultConfig();
}
DigestingParser.Digester digester = null;
if (line.hasOption("digest")) {
int digestMarkLimit = DEFAULT_DIGEST_MARK_LIMIT;
if (line.hasOption("dml")) {
String dmlS = line.getOptionValue("dml");
try {
digestMarkLimit = Integer.parseInt(dmlS);
} catch (NumberFormatException e) {
throw new RuntimeException("Must have parseable int after digestMarkLimit(dml): " + dmlS);
}
}
digester = new CommonsDigester(digestMarkLimit, CommonsDigester.parse(line.getOptionValue("digest")));
}
if (line.hasOption("enableFileUrl") && !line.hasOption("enableUnsecureFeatures")) {
System.err.println("If you want to enable fileUrl, you must also acknowledge the security risks\n" + "by including --enableUnsecureFeatures. See CVE-2015-3271.");
System.exit(-1);
}
InputStreamFactory inputStreamFactory = null;
if (line.hasOption("enableFileUrl") && line.hasOption("enableUnsecureFeatures")) {
inputStreamFactory = new URLEnabledInputStreamFactory();
System.out.println(FILE_URL_WARNING);
} else {
inputStreamFactory = new DefaultInputStreamFactory();
}
TikaResource.init(tika, digester, inputStreamFactory);
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
List<ResourceProvider> rCoreProviders = new ArrayList<>();
rCoreProviders.add(new SingletonResourceProvider(new MetadataResource()));
rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource()));
rCoreProviders.add(new SingletonResourceProvider(new DetectorResource()));
rCoreProviders.add(new SingletonResourceProvider(new LanguageResource()));
rCoreProviders.add(new SingletonResourceProvider(new TranslateResource()));
rCoreProviders.add(new SingletonResourceProvider(new TikaResource()));
rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource()));
rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes()));
rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors()));
rCoreProviders.add(new SingletonResourceProvider(new TikaParsers()));
rCoreProviders.add(new SingletonResourceProvider(new TikaVersion()));
List<ResourceProvider> rAllProviders = new ArrayList<>(rCoreProviders);
rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(rCoreProviders)));
sf.setResourceProviders(rAllProviders);
List<Object> providers = new ArrayList<>();
providers.add(new TarWriter());
providers.add(new ZipWriter());
providers.add(new CSVMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
providers.add(new JSONMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
if (logFilter != null) {
providers.add(logFilter);
}
if (corsFilter != null) {
providers.add(corsFilter);
}
sf.setProviders(providers);
String url = "http://" + host + ":" + port + "/";
sf.setAddress(url);
BindingFactoryManager manager = sf.getBus().getExtension(BindingFactoryManager.class);
JAXRSBindingFactory factory = new JAXRSBindingFactory();
factory.setBus(sf.getBus());
manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, factory);
sf.create();
LOG.info("Started Apache Tika server at {}", url);
} catch (Exception ex) {
LOG.error("Can't start", ex);
System.exit(-1);
}
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class RTFParserTest method testConfig.
@Test
public void testConfig() throws Exception {
//test that memory allocation of the bin element is limited
//via the config file. Unfortunately, this test file's bin embedding contains 10 bytes
//so we had to set the config to 0.
InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/rtf/tika-config.xml");
assertNotNull(is);
TikaConfig tikaConfig = new TikaConfig(is);
Parser p = new AutoDetectParser(tikaConfig);
List<Metadata> metadataList = getRecursiveMetadata("testBinControlWord.rtf", p);
assertEquals(1, metadataList.size());
assertContains("TikaMemoryLimitException", metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class ConcurrentUtilsTest method testExecuteExecutor.
@Test
public void testExecuteExecutor() throws Exception {
TikaConfig config = TikaConfig.getDefaultConfig();
ParseContext context = new ParseContext();
context.set(ExecutorService.class, config.getExecutorService());
Future result = ConcurrentUtils.execute(context, new Runnable() {
@Override
public void run() {
//Do nothing
}
});
assertNull(result.get());
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class DL4JInceptionV3NetTest method recognise.
@Test
@Ignore("until we can make this more robust across platforms")
public void recognise() throws Exception {
TikaConfig config;
try {
config = new TikaConfig(getClass().getResourceAsStream("dl4j-inception3-config.xml"));
} catch (TikaConfigException e) {
if (e.getMessage() != null && e.getMessage().contains("Connection refused")) {
return;
}
throw e;
}
Tika tika = new Tika(config);
Metadata md = new Metadata();
tika.parse(getClass().getResourceAsStream("cat.jpg"), md);
String[] objects = md.getValues("OBJECT");
boolean found = false;
for (String object : objects) {
if (object.contains("_cat")) {
found = true;
}
}
assertTrue(found);
}
Aggregations