use of org.apache.tika.mime.MimeTypes in project Asqatasun by Asqatasun.
the class UploadAuditSetUpFormValidator method validateFiles.
/**
* Control whether the uploaded files are of HTML type and whether their
* size is under the maxFileSize limit.
*
* @param uploadAuditSetUpCommand
* @param errors
*/
private void validateFiles(AuditSetUpCommand uploadAuditSetUpCommand, Errors errors) {
boolean emptyFile = true;
Metadata metadata = new Metadata();
MimeTypes mimeTypes = TikaConfig.getDefaultConfig().getMimeRepository();
String mime;
for (int i = 0; i < uploadAuditSetUpCommand.getFileInputList().length; i++) {
try {
CommonsMultipartFile cmf = uploadAuditSetUpCommand.getFileInputList()[i];
if (cmf.getSize() > maxFileSize) {
Long maxFileSizeInMega = maxFileSize / 1000000;
String[] arg = { maxFileSizeInMega.toString() };
errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", FILE_SIZE_EXCEEDED_MSG_BUNDLE_KEY, arg, "{0}");
}
if (cmf.getSize() > 0) {
emptyFile = false;
mime = mimeTypes.detect(new BufferedInputStream(cmf.getInputStream()), metadata).toString();
LOGGER.debug("mime " + mime + " " + cmf.getOriginalFilename());
if (!authorizedMimeType.contains(mime)) {
errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", NOT_HTML_MSG_BUNDLE_KEY);
}
}
} catch (IOException ex) {
LOGGER.warn(ex);
errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", NOT_HTML_MSG_BUNDLE_KEY);
}
}
if (emptyFile) {
// if no file is uploaded
LOGGER.debug("emptyFiles");
errors.rejectValue(GENERAL_ERROR_MSG_KEY, NO_FILE_UPLOADED_MSG_BUNDLE_KEY);
}
}
use of org.apache.tika.mime.MimeTypes in project tika by apache.
the class MimeUtilTest method assertResult.
private void assertResult(String contentType, String expected) throws MimeTypeException {
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
MimeTypes r = tikaConfig.getMimeRepository();
MimeType mt = r.forName(contentType);
// String ext = MimeUtil.getExtension(contentType, config);
assertEquals(expected, mt.getExtension());
}
use of org.apache.tika.mime.MimeTypes in project tika by apache.
the class TikaCLI method compareFileMagic.
/**
* Compares our mime types registry with the File(1) tool's
* directory of (uncompiled) Magic entries.
* (Well, those with mimetypes anyway)
* @param magicDir Path to the magic directory
*/
private void compareFileMagic(String magicDir) throws Exception {
Set<String> tikaLacking = new TreeSet<String>();
Set<String> tikaNoMagic = new TreeSet<String>();
// Sanity check
File dir = new File(magicDir);
if ((new File(dir, "elf")).exists() && (new File(dir, "mime")).exists() && (new File(dir, "vorbis")).exists()) {
// Looks plausible
} else {
throw new IllegalArgumentException(magicDir + " doesn't seem to hold uncompressed file magic entries");
}
// Find all the mimetypes in the directory
Set<String> fileMimes = new HashSet<String>();
for (File mf : dir.listFiles()) {
if (mf.isFile()) {
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(mf), UTF_8));
String line;
while ((line = r.readLine()) != null) {
if (line.startsWith("!:mime") || line.startsWith("#!:mime")) {
String mime = line.substring(7).trim();
fileMimes.add(mime);
}
}
r.close();
}
}
// See how those compare to the Tika ones
TikaConfig config = TikaConfig.getDefaultConfig();
MimeTypes mimeTypes = config.getMimeRepository();
MediaTypeRegistry registry = config.getMediaTypeRegistry();
for (String mime : fileMimes) {
try {
final MimeType type = mimeTypes.getRegisteredMimeType(mime);
if (type == null) {
// Tika doesn't know about this one
tikaLacking.add(mime);
} else {
// Tika knows about this one!
// Does Tika have magic for it?
boolean hasMagic = type.hasMagic();
// How about the children?
if (!hasMagic) {
for (MediaType child : registry.getChildTypes(type.getType())) {
MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
if (childType != null && childType.hasMagic()) {
hasMagic = true;
}
}
}
// How about the parents?
MimeType parentType = type;
while (parentType != null && !hasMagic) {
if (parentType.hasMagic()) {
// Has magic, fine
hasMagic = true;
} else {
// Check the parent next
MediaType parent = registry.getSupertype(type.getType());
if (parent == MediaType.APPLICATION_XML || parent == MediaType.TEXT_PLAIN || parent == MediaType.OCTET_STREAM) {
// Stop checking parents if we hit a top level type
parent = null;
}
if (parent != null) {
parentType = mimeTypes.getRegisteredMimeType(parent.toString());
} else {
parentType = null;
}
}
}
if (!hasMagic) {
tikaNoMagic.add(mime);
}
}
} catch (MimeTypeException e) {
// Broken entry in the file magic directory
// Silently skip
}
}
// Check how many tika knows about
int tikaTypes = 0;
int tikaAliases = 0;
for (MediaType type : registry.getTypes()) {
tikaTypes++;
tikaAliases += registry.getAliases(type).size();
}
// Report
System.out.println("Tika knows about " + tikaTypes + " unique mime types");
System.out.println("Tika knows about " + (tikaTypes + tikaAliases) + " mime types including aliases");
System.out.println("The File Magic directory knows about " + fileMimes.size() + " unique mime types");
System.out.println();
System.out.println("The following mime types are known to File but not Tika:");
for (String mime : tikaLacking) {
System.out.println(" " + mime);
}
System.out.println();
System.out.println("The following mime types from File have no Tika magic (but their children might):");
for (String mime : tikaNoMagic) {
System.out.println(" " + mime);
}
}
use of org.apache.tika.mime.MimeTypes in project ddf by codice.
the class TikaMimeTypeResolver method getFileExtensionForMimeType.
@Override
public String getFileExtensionForMimeType(String contentType) {
LOGGER.trace("ENTERING: getFileExtensionForMimeType()");
MimeTypes mimeTypes = config.getMimeRepository();
String extension = null;
if (StringUtils.isNotEmpty(contentType)) {
try {
MimeType mimeType = mimeTypes.forName(contentType);
extension = mimeType.getExtension();
} catch (Exception e) {
LOGGER.debug("Exception caught getting file extension for mime type {}", contentType, e);
}
}
LOGGER.debug("mimeType = {}, file extension = [{}]", contentType, extension);
LOGGER.trace("EXITING: getFileExtensionForMimeType()");
return extension;
}
use of org.apache.tika.mime.MimeTypes in project Asqatasun by Asqatasun.
the class AddScenarioFormValidator method checkScenarioFileTypeAndSize.
/**
*
* @param addScenarioCommand
* @param errors
* @return whether the scenario handled by the current AddScenarioCommand
* has a correct type and size
*/
public boolean checkScenarioFileTypeAndSize(AddScenarioCommand addScenarioCommand, Errors errors) {
if (addScenarioCommand.getScenarioFile() == null) {
// if no file uploaded
LOGGER.debug("empty Scenario File");
errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
errors.rejectValue(SCENARIO_FILE_KEY, NO_SCENARIO_UPLOADED_MSG_BUNDLE_KEY);
return false;
}
Metadata metadata = new Metadata();
MimeTypes mimeTypes = TikaConfig.getDefaultConfig().getMimeRepository();
String mime;
try {
CommonsMultipartFile cmf = addScenarioCommand.getScenarioFile();
if (cmf.getSize() > maxFileSize) {
Long maxFileSizeInMega = maxFileSize / 1000000;
String[] arg = { maxFileSizeInMega.toString() };
errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
errors.rejectValue(SCENARIO_FILE_KEY, FILE_SIZE_EXCEEDED_MSG_BUNDLE_KEY, arg, "{0}");
return false;
} else if (cmf.getSize() > 0) {
mime = mimeTypes.detect(new BufferedInputStream(cmf.getInputStream()), metadata).toString();
LOGGER.debug("mime " + mime + " " + cmf.getOriginalFilename());
if (!authorizedMimeType.contains(mime)) {
errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
errors.rejectValue(SCENARIO_FILE_KEY, NOT_SCENARIO_MSG_BUNDLE_KEY);
return false;
}
} else {
LOGGER.debug("File with size null");
errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
errors.rejectValue(SCENARIO_FILE_KEY, NO_SCENARIO_UPLOADED_MSG_BUNDLE_KEY);
return false;
}
} catch (IOException ex) {
LOGGER.warn(ex);
errors.rejectValue(SCENARIO_FILE_KEY, NOT_SCENARIO_MSG_BUNDLE_KEY);
errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
return false;
}
return true;
}
Aggregations