use of org.apache.tika.config.TikaConfig in project che by eclipse.
the class MediaTypeFilter method accept.
@Override
public boolean accept(VirtualFile file) {
try (InputStream content = file.getContent()) {
TikaConfig tikaConfig = new TikaConfig();
MediaType mimeType = tikaConfig.getDetector().detect(content, new Metadata());
if (excludedMediaTypes.contains(mimeType) || excludedTypes.contains(mimeType.getType())) {
return true;
}
return false;
} catch (TikaException | ForbiddenException | ServerException | IOException e) {
return true;
}
}
use of org.apache.tika.config.TikaConfig in project lucene-solr by apache.
the class TikaEntityProcessor method firstInit.
@Override
protected void firstInit(Context context) {
super.firstInit(context);
try {
String tikaConfigFile = context.getResolvedEntityAttribute("tikaConfig");
if (tikaConfigFile == null) {
ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
tikaConfig = new TikaConfig(classLoader);
} else {
File configFile = new File(tikaConfigFile);
if (!configFile.isAbsolute()) {
configFile = new File(context.getSolrCore().getResourceLoader().getConfigDir(), tikaConfigFile);
}
tikaConfig = new TikaConfig(configFile);
}
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to load Tika Config");
}
String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
if ("true".equals(extractEmbeddedString)) {
extractEmbedded = true;
}
format = context.getResolvedEntityAttribute("format");
if (format == null)
format = "text";
if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format) && !"none".equals(format))
throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none");
htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
if (htmlMapper == null)
htmlMapper = "default";
if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper))
throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'");
parser = context.getResolvedEntityAttribute("parser");
if (parser == null) {
parser = AUTO_PARSER;
}
spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
}
use of org.apache.tika.config.TikaConfig in project lucene-solr by apache.
the class ExtractingRequestHandler method inform.
@Override
public void inform(SolrCore core) {
if (initArgs != null) {
//if relative,then relative to config dir, otherwise, absolute path
String tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
if (tikaConfigLoc != null) {
File configFile = new File(tikaConfigLoc);
if (configFile.isAbsolute() == false) {
configFile = new File(core.getResourceLoader().getConfigDir(), configFile.getPath());
}
try {
config = new TikaConfig(configFile);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
String parseContextConfigLoc = (String) initArgs.get(PARSE_CONTEXT_CONFIG);
if (parseContextConfigLoc != null) {
try {
parseContextConfig = new ParseContextConfig(core.getResourceLoader(), parseContextConfigLoc);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
NamedList configDateFormats = (NamedList) initArgs.get(DATE_FORMATS);
if (configDateFormats != null && configDateFormats.size() > 0) {
dateFormats = new HashSet<>();
Iterator<Map.Entry> it = configDateFormats.iterator();
while (it.hasNext()) {
String format = (String) it.next().getValue();
log.info("Adding Date Format: " + format);
dateFormats.add(format);
}
}
}
if (config == null) {
try {
config = getDefaultConfig(core.getResourceLoader().getClassLoader());
} catch (MimeTypeException | IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
if (parseContextConfig == null) {
parseContextConfig = new ParseContextConfig();
}
factory = createFactory();
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class TikaConfigTest method testUnknownParser.
/**
* Make sure that with a service loader given, we can
* get different configurable behaviour on parser classes
* which can't be found.
*/
@Test
public void testUnknownParser() throws Exception {
ServiceLoader ignoreLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.IGNORE);
ServiceLoader warnLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.WARN);
ServiceLoader throwLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.THROW);
Path configPath = Paths.get(new URI(getConfigPath("TIKA-1700-unknown-parser.xml")));
TikaConfig ignore = new TikaConfig(configPath, ignoreLoader);
assertNotNull(ignore);
assertNotNull(ignore.getParser());
assertEquals(1, ((CompositeParser) ignore.getParser()).getAllComponentParsers().size());
TikaConfig warn = new TikaConfig(configPath, warnLoader);
assertNotNull(warn);
assertNotNull(warn.getParser());
assertEquals(1, ((CompositeParser) warn.getParser()).getAllComponentParsers().size());
try {
new TikaConfig(configPath, throwLoader);
fail("Shouldn't get here, invalid parser class");
} catch (TikaException expected) {
}
}
use of org.apache.tika.config.TikaConfig in project tika by apache.
the class TikaConfigTest method defaultParserWithExcludes.
/**
* TIKA-1445 It should be possible to exclude DefaultParser from
* certain types, so another parser explicitly listed will take them
*/
@Test
public void defaultParserWithExcludes() throws Exception {
try {
TikaConfig config = getConfig("TIKA-1445-default-except.xml");
CompositeParser cp = (CompositeParser) config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
Parser p;
// Will be the three parsers defined in the xml
assertEquals(3, parsers.size());
// Should have a wrapped DefaultParser, not the main DefaultParser,
// as it is excluded from handling certain classes
p = parsers.get(0);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(DefaultParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
// Should have two others which claim things, which they wouldn't
// otherwise handle
p = parsers.get(1);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(EmptyParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
assertEquals("hello/world", p.getSupportedTypes(null).iterator().next().toString());
p = parsers.get(2);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(ErrorParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
assertEquals("fail/world", p.getSupportedTypes(null).iterator().next().toString());
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
Aggregations