use of org.apache.tika.exception.TikaException in project tika by apache.
the class MicrosoftTranslator method translate.
/**
* Use the Microsoft service to translate the given text from the given source language to the given target.
* You must set the client keys in translator.microsoft.properties.
*
* @param text The text to translate.
* @param sourceLanguage The input text language (for example, "en").
* @param targetLanguage The desired language to translate to (for example, "fr").
* @return The translated text. If translation is unavailable, returns the unchanged text.
* @throws Exception
* @see org.apache.tika.language.translate.Translator
* @since Tika 1.6
*/
public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
if (!available)
return text;
Language source = Language.fromString(sourceLanguage);
Language target = Language.fromString(targetLanguage);
Translate.setClientId(clientId);
Translate.setClientSecret(clientSecret);
try {
return Translate.execute(text, source, target);
} catch (Exception e) {
throw new TikaException("Error with Microsoft Translation: " + e.getMessage());
}
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class YandexTranslator method translate.
@Override
public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
if (!this.isAvailable()) {
return text;
}
WebClient client = WebClient.create(YANDEX_TRANSLATE_URL_BASE);
String langCode;
if (sourceLanguage == null) {
//Translate Service will identify source language
langCode = targetLanguage;
} else {
//Source language is well known
langCode = sourceLanguage + '-' + targetLanguage;
}
//TODO Add support for text over 10k characters
Response response = client.accept(MediaType.APPLICATION_JSON).query("key", this.apiKey).query("lang", langCode).query("text", text).get();
BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream) response.getEntity(), UTF_8));
String line = null;
StringBuffer responseText = new StringBuffer();
while ((line = reader.readLine()) != null) {
responseText.append(line);
}
try {
ObjectMapper mapper = new ObjectMapper();
JsonNode jsonResp = mapper.readTree(responseText.toString());
if (!jsonResp.findValuesAsText("code").isEmpty()) {
String code = jsonResp.findValuesAsText("code").get(0);
if (code.equals("200")) {
return jsonResp.findValue("text").get(0).asText();
} else {
throw new TikaException(jsonResp.findValue("message").get(0).asText());
}
} else {
throw new TikaException("Return message not recognized: " + responseText.toString().substring(0, Math.min(responseText.length(), 100)));
}
} catch (JsonParseException e) {
throw new TikaException("Error requesting translation from '" + sourceLanguage + "' to '" + targetLanguage + "', JSON response from Lingo24 is not well formatted: " + responseText.toString());
}
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class JoshuaNetworkTranslator method translate.
/**
* <p>Initially then check if the source language has been provided.
* If no source language (or a null value) has been provided then
* we make an attempt to guess the source using Tika's
* {@link org.apache.tika.langdetect.OptimaizeLangDetector}. If we
* are still unable to guess the language then we return the source
* text.</p>
*
* <p>We then process the input text into a new string consisting of
* sentences, one per line e.g. insert \n between the presence of '.'</p>
*
* @see org.apache.tika.language.translate.Translator#translate
* (java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
//create networkURI
if (!networkServer.endsWith("/")) {
networkURI = networkServer + "/" + targetLanguage;
} else {
networkURI = networkServer + targetLanguage;
}
if (!this.isAvailable())
return text;
//make an attempt to guess language if one is not provided.
if (sourceLanguage == null)
sourceLanguage = detectLanguage(text).getLanguage();
//process input text into sentences, one per line
// e.g. insert \n between the presence of '.'
StringBuilder sb = new StringBuilder(text);
int i = 0;
while ((i = sb.indexOf(".", i + 1)) != -1) {
sb.replace(i, i + 1, "\n");
}
String inputText = sb.toString();
WebClient client;
final List<Object> providers = new ArrayList<>();
JacksonJsonProvider jacksonJsonProvider = new JacksonJsonProvider();
providers.add(jacksonJsonProvider);
client = WebClient.create(networkURI, providers);
ObjectMapper requestMapper = new ObjectMapper();
ObjectNode jsonNode = requestMapper.createObjectNode();
jsonNode.put("inputLanguage", sourceLanguage);
jsonNode.put("inputText", inputText);
//make the reuest
Response response = client.accept(MediaType.APPLICATION_JSON).type(MediaType.APPLICATION_JSON).post(jsonNode);
BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream) response.getEntity(), UTF_8));
String line;
StringBuilder responseText = new StringBuilder();
while ((line = reader.readLine()) != null) {
responseText.append(line);
}
try {
ObjectMapper responseMapper = new ObjectMapper();
JsonNode jsonResp = responseMapper.readTree(responseText.toString());
if (jsonResp.findValuesAsText("outputText") != null) {
return jsonResp.findValuesAsText("outputText").get(0);
} else {
throw new TikaException(jsonResp.findValue("message").get(0).asText());
}
} catch (JsonParseException e) {
throw new TikaException("Error requesting translation from '" + sourceLanguage + "' to '" + targetLanguage + "', JSON response " + "from Joshua REST Server is not well formatted: " + responseText.toString());
}
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class TikaToXMP method getConverter.
/**
* Retrieve a specific converter according to the mimetype
*
* @param mimetype
* the Mimetype
* @return the converter or null, if none exists
* @throws TikaException
*/
public static ITikaToXMPConverter getConverter(String mimetype) throws TikaException {
if (mimetype == null) {
throw new IllegalArgumentException("mimetype must not be null");
}
ITikaToXMPConverter converter = null;
MediaType type = MediaType.parse(mimetype);
if (type != null) {
Class<? extends ITikaToXMPConverter> clazz = getConverterMap().get(type);
if (clazz != null) {
try {
converter = clazz.newInstance();
} catch (Exception e) {
throw new TikaException("TikaToXMP converter class cannot be instantiated for mimetype: " + type.toString(), e);
}
}
}
return converter;
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class NetCDFParser method parse.
/*
* (non-Javadoc)
*
* @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
* org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
* org.apache.tika.parser.ParseContext)
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = TikaInputStream.isTikaInputStream(stream) ? null : new TemporaryResources();
TikaInputStream tis = TikaInputStream.get(stream, tmp);
NetcdfFile ncFile = null;
try {
ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
// first parse out the set of global attributes
for (Attribute attr : ncFile.getGlobalAttributes()) {
Property property = resolveMetadataKey(attr.getFullName());
if (attr.getDataType().isString()) {
metadata.add(property, attr.getStringValue());
} else if (attr.getDataType().isNumeric()) {
int value = attr.getNumericValue().intValue();
metadata.add(property, String.valueOf(value));
}
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.newline();
xhtml.element("h1", "dimensions");
xhtml.startElement("ul");
xhtml.newline();
for (Dimension dim : ncFile.getDimensions()) {
xhtml.element("li", dim.getFullName() + " = " + dim.getLength());
}
xhtml.endElement("ul");
xhtml.element("h1", "variables");
xhtml.startElement("ul");
xhtml.newline();
for (Variable var : ncFile.getVariables()) {
xhtml.startElement("li");
xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions());
xhtml.newline();
List<Attribute> attributes = var.getAttributes();
if (!attributes.isEmpty()) {
xhtml.startElement("ul");
for (Attribute element : attributes) {
xhtml.element("li", element.toString());
}
xhtml.endElement("ul");
}
xhtml.endElement("li");
}
xhtml.endElement("ul");
xhtml.endDocument();
} catch (IOException e) {
throw new TikaException("NetCDF parse error", e);
} finally {
if (ncFile != null) {
ncFile.close();
}
if (tmp != null) {
tmp.dispose();
}
}
}
Aggregations