use of org.apache.tika.parser.utils.CommonsDigester in project tika by apache.
the class DigestingParserTest method testReset.
@Test
public void testReset() throws Exception {
String expectedMD5 = "59f626e09a8c16ab6dbc2800c685f772";
Metadata m = new Metadata();
XMLResult xml = getXML("test_recursive_embedded.docx", new DigestingParser(p, new CommonsDigester(100, CommonsDigester.DigestAlgorithm.MD5)), m);
assertEquals(expectedMD5, m.get(P + "MD5"));
}
use of org.apache.tika.parser.utils.CommonsDigester in project tika by apache.
the class DigestingParserTest method testBasic.
@Test
public void testBasic() throws Exception {
Map<CommonsDigester.DigestAlgorithm, String> expected = new HashMap<>();
expected.put(CommonsDigester.DigestAlgorithm.MD2, "d768c8e27b0b52c6eaabfaa7122d1d4f");
expected.put(CommonsDigester.DigestAlgorithm.MD5, "59f626e09a8c16ab6dbc2800c685f772");
expected.put(CommonsDigester.DigestAlgorithm.SHA1, "7a1f001d163ac90d8ea54c050faf5a38079788a6");
expected.put(CommonsDigester.DigestAlgorithm.SHA256, "c4b7fab030a8b6a9d6691f6699ac8e6f" + "82bc53764a0f1430d134ae3b70c32654");
expected.put(CommonsDigester.DigestAlgorithm.SHA384, "ebe368b9326fef44408290724d187553" + "8b8a6923fdf251ddab72c6e4b5d54160" + "9db917ba4260d1767995a844d8d654df");
expected.put(CommonsDigester.DigestAlgorithm.SHA512, "ee46d973ee1852c018580c242955974d" + "da4c21f36b54d7acd06fcf68e974663b" + "fed1d256875be58d22beacf178154cc3" + "a1178cb73443deaa53aa0840324708bb");
//test each one
for (CommonsDigester.DigestAlgorithm algo : CommonsDigester.DigestAlgorithm.values()) {
Metadata m = new Metadata();
XMLResult xml = getXML("test_recursive_embedded.docx", new DigestingParser(p, new CommonsDigester(UNLIMITED, algo)), m);
assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
}
//test comma separated
CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse("md5,sha256,sha384,sha512");
Metadata m = new Metadata();
XMLResult xml = getXML("test_recursive_embedded.docx", new DigestingParser(p, new CommonsDigester(UNLIMITED, algos)), m);
for (CommonsDigester.DigestAlgorithm algo : new CommonsDigester.DigestAlgorithm[] { CommonsDigester.DigestAlgorithm.MD5, CommonsDigester.DigestAlgorithm.SHA256, CommonsDigester.DigestAlgorithm.SHA384, CommonsDigester.DigestAlgorithm.SHA512 }) {
assertEquals(algo.toString(), expected.get(algo), m.get(P + algo.toString()));
}
assertNull(m.get(P + CommonsDigester.DigestAlgorithm.MD2.toString()));
assertNull(m.get(P + CommonsDigester.DigestAlgorithm.SHA1.toString()));
}
use of org.apache.tika.parser.utils.CommonsDigester in project tika by apache.
the class CXFTestBase method setUp.
@Before
public void setUp() {
this.tika = TikaConfig.getDefaultConfig();
TikaResource.init(tika, new CommonsDigester(DIGESTER_READ_LIMIT, CommonsDigester.DigestAlgorithm.MD5), new DefaultInputStreamFactory());
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
setUpResources(sf);
setUpProviders(sf);
sf.setAddress(endPoint + "/");
BindingFactoryManager manager = sf.getBus().getExtension(BindingFactoryManager.class);
JAXRSBindingFactory factory = new JAXRSBindingFactory();
factory.setBus(sf.getBus());
manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, factory);
server = sf.create();
}
use of org.apache.tika.parser.utils.CommonsDigester in project tika by apache.
the class TikaServerCli method main.
public static void main(String[] args) {
LOG.info("Starting {} server", new Tika());
try {
Options options = getOptions();
CommandLineParser cliParser = new GnuParser();
CommandLine line = cliParser.parse(options, args);
if (line.hasOption("help")) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("tikaserver", options);
System.exit(-1);
}
String host = DEFAULT_HOST;
if (line.hasOption("host")) {
host = line.getOptionValue("host");
if ("*".equals(host)) {
host = "0.0.0.0";
}
}
int port = DEFAULT_PORT;
if (line.hasOption("port")) {
port = Integer.valueOf(line.getOptionValue("port"));
}
boolean returnStackTrace = false;
if (line.hasOption("includeStack")) {
returnStackTrace = true;
}
TikaLoggingFilter logFilter = null;
if (line.hasOption("log")) {
String logLevel = line.getOptionValue("log");
if (LOG_LEVELS.contains(logLevel)) {
boolean isInfoLevel = "info".equals(logLevel);
logFilter = new TikaLoggingFilter(isInfoLevel);
} else {
LOG.info("Unsupported request URI log level: {}", logLevel);
}
}
CrossOriginResourceSharingFilter corsFilter = null;
if (line.hasOption("cors")) {
corsFilter = new CrossOriginResourceSharingFilter();
String url = line.getOptionValue("cors");
List<String> origins = new ArrayList<String>();
// Empty list allows all origins.
if (!url.equals("*"))
origins.add(url);
corsFilter.setAllowOrigins(origins);
}
// The Tika Configuration to use throughout
TikaConfig tika;
if (line.hasOption("config")) {
String configFilePath = line.getOptionValue("config");
LOG.info("Using custom config: {}", configFilePath);
tika = new TikaConfig(configFilePath);
} else {
tika = TikaConfig.getDefaultConfig();
}
DigestingParser.Digester digester = null;
if (line.hasOption("digest")) {
int digestMarkLimit = DEFAULT_DIGEST_MARK_LIMIT;
if (line.hasOption("dml")) {
String dmlS = line.getOptionValue("dml");
try {
digestMarkLimit = Integer.parseInt(dmlS);
} catch (NumberFormatException e) {
throw new RuntimeException("Must have parseable int after digestMarkLimit(dml): " + dmlS);
}
}
digester = new CommonsDigester(digestMarkLimit, CommonsDigester.parse(line.getOptionValue("digest")));
}
if (line.hasOption("enableFileUrl") && !line.hasOption("enableUnsecureFeatures")) {
System.err.println("If you want to enable fileUrl, you must also acknowledge the security risks\n" + "by including --enableUnsecureFeatures. See CVE-2015-3271.");
System.exit(-1);
}
InputStreamFactory inputStreamFactory = null;
if (line.hasOption("enableFileUrl") && line.hasOption("enableUnsecureFeatures")) {
inputStreamFactory = new URLEnabledInputStreamFactory();
System.out.println(FILE_URL_WARNING);
} else {
inputStreamFactory = new DefaultInputStreamFactory();
}
TikaResource.init(tika, digester, inputStreamFactory);
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
List<ResourceProvider> rCoreProviders = new ArrayList<>();
rCoreProviders.add(new SingletonResourceProvider(new MetadataResource()));
rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource()));
rCoreProviders.add(new SingletonResourceProvider(new DetectorResource()));
rCoreProviders.add(new SingletonResourceProvider(new LanguageResource()));
rCoreProviders.add(new SingletonResourceProvider(new TranslateResource()));
rCoreProviders.add(new SingletonResourceProvider(new TikaResource()));
rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource()));
rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes()));
rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors()));
rCoreProviders.add(new SingletonResourceProvider(new TikaParsers()));
rCoreProviders.add(new SingletonResourceProvider(new TikaVersion()));
List<ResourceProvider> rAllProviders = new ArrayList<>(rCoreProviders);
rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(rCoreProviders)));
sf.setResourceProviders(rAllProviders);
List<Object> providers = new ArrayList<>();
providers.add(new TarWriter());
providers.add(new ZipWriter());
providers.add(new CSVMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
providers.add(new JSONMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
if (logFilter != null) {
providers.add(logFilter);
}
if (corsFilter != null) {
providers.add(corsFilter);
}
sf.setProviders(providers);
String url = "http://" + host + ":" + port + "/";
sf.setAddress(url);
BindingFactoryManager manager = sf.getBus().getExtension(BindingFactoryManager.class);
JAXRSBindingFactory factory = new JAXRSBindingFactory();
factory.setBus(sf.getBus());
manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, factory);
sf.create();
LOG.info("Started Apache Tika server at {}", url);
} catch (Exception ex) {
LOG.error("Can't start", ex);
System.exit(-1);
}
}
use of org.apache.tika.parser.utils.CommonsDigester in project tika by apache.
the class DigestingParserTest method testNegativeMaxMarkLength.
@Test
public void testNegativeMaxMarkLength() throws Exception {
Metadata m = new Metadata();
boolean ex = false;
try {
XMLResult xml = getXML("test_recursive_embedded.docx", new DigestingParser(p, new CommonsDigester(-1, CommonsDigester.DigestAlgorithm.MD5)), m);
} catch (IllegalArgumentException e) {
ex = true;
}
assertTrue("Exception not thrown", ex);
}
Aggregations