use of org.apache.tika.mime.MimeType in project cerberus-source by cerberustesting.
the class SikuliService method generatePostParameters.
private JSONObject generatePostParameters(String action, String locator, String text, long defaultWait) throws JSONException, IOException, MalformedURLException, MimeTypeException {
JSONObject result = new JSONObject();
String picture = "";
String extension = "";
/**
* Get Picture from URL and convert to Base64
*/
if (locator != null && !"".equals(locator)) {
URL url = new URL(locator);
URLConnection connection = url.openConnection();
InputStream istream = new BufferedInputStream(connection.getInputStream());
/**
* Get the MimeType and the extension
*/
String mimeType = URLConnection.guessContentTypeFromStream(istream);
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
MimeType mt = allTypes.forName(mimeType);
extension = mt.getExtension();
/**
* Encode in Base64
*/
byte[] bytes = IOUtils.toByteArray(istream);
picture = Base64.encodeBase64URLSafeString(bytes);
}
/**
* Build JSONObject with parameters action : Action expected to be done
* by Sikuli picture : Picture in Base64 format text : Text to type
* defaultWait : Timeout for the action pictureExtension : Extension for
* Base64 decoding
*/
result.put("action", action);
result.put("picture", picture);
result.put("text", text);
result.put("defaultWait", defaultWait);
result.put("pictureExtension", extension);
return result;
}
use of org.apache.tika.mime.MimeType in project nutch by apache.
the class MimeUtil method autoResolveContentType.
/**
* A facade interface to trying all the possible mime type resolution
* strategies available within Tika. First, the mime type provided in
* <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
* the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
* registry, by its cleaned name. If the {@link MimeType} is found, then that
* mime type is used, otherwise URL resolution is used to try and determine
* the mime type. However, if <code>mime.type.magic</code> is enabled in
* {@link NutchConfiguration}, then mime type magic resolution is used to try
* and obtain a better-than-the-default approximation of the {@link MimeType}.
*
* @param typeName
* The original mime type, returned from a {@link ProtocolOutput}.
* @param url
* The given @see url, that Nutch was trying to crawl.
* @param data
* The byte data, returned from the crawl, if any.
* @return The correctly, automatically guessed {@link MimeType} name.
*/
public String autoResolveContentType(String typeName, String url, byte[] data) {
String retType = null;
MimeType type = null;
String cleanedMimeType = null;
cleanedMimeType = MimeUtil.cleanMimeType(typeName);
// first try to get the type from the cleaned type name
if (cleanedMimeType != null) {
try {
type = mimeTypes.forName(cleanedMimeType);
cleanedMimeType = type.getName();
} catch (MimeTypeException mte) {
// Seems to be a malformed mime type name...
cleanedMimeType = null;
}
}
// if returned null, or if it's the default type then try url resolution
if (type == null || type.getName().equals(MimeTypes.OCTET_STREAM)) {
// mime-type, then guess a mime-type from the url pattern
try {
retType = tika.detect(url) != null ? tika.detect(url) : null;
} catch (Exception e) {
String message = "Problem loading default Tika configuration";
LOG.error(message, e);
throw new RuntimeException(e);
}
} else {
retType = type.getName();
}
// returned by the magic
if (this.mimeMagic) {
String magicType = null;
// pass URL (file name) and (cleansed) content type from protocol to Tika
Metadata tikaMeta = new Metadata();
tikaMeta.add(TikaCoreProperties.RESOURCE_NAME_KEY, url);
tikaMeta.add(Metadata.CONTENT_TYPE, (cleanedMimeType != null ? cleanedMimeType : typeName));
try {
try (InputStream stream = TikaInputStream.get(data)) {
magicType = mimeTypes.detect(stream, tikaMeta).toString();
}
} catch (IOException ignore) {
}
if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM) && retType != null && !retType.equals(magicType)) {
// If magic enabled and the current mime type differs from that of the
// one returned from the magic, take the magic mimeType
retType = magicType;
}
// default type
if (retType == null) {
try {
retType = MimeTypes.OCTET_STREAM;
} catch (Exception ignore) {
}
}
}
return retType;
}
Aggregations