use of com.ibm.icu.text.CharsetDetector in project zm-mailbox by Zimbra.
the class ZInternetHeader method detectCharset.
private Charset detectCharset(byte[] content, Charset defaultCharset) {
if (defaultCharset == null) {
defaultCharset = Charset.defaultCharset();
}
CharsetDetector detector = new CharsetDetector();
detector.setText(content);
Charset match = findMatch(detector);
return (match != null ? match : defaultCharset);
}
use of com.ibm.icu.text.CharsetDetector in project stanbol by apache.
the class CharsetRecognizer method detect.
public static String detect(InputStream in, String format, String encoding) throws IOException {
// the input stream must support marks
if (!in.markSupported()) {
throw new IOException("Mark not supported by input stream");
}
String result = null;
if (format != null) {
result = checkFormat(format, in);
if (result != null) {
return result;
}
}
// in case of HTML or XML check whether there is a charset
// specification; might be too fragile
CharsetDetector detector = new CharsetDetector();
if (encoding != null) {
detector.setDeclaredEncoding(encoding);
}
detector.setText(in);
CharsetMatch found = detector.detect();
result = found.getName();
LOG.debug("Encoding: " + result);
return result;
}
use of com.ibm.icu.text.CharsetDetector in project zm-mailbox by Zimbra.
the class CsvFormatter method saveCallback.
@Override
public void saveCallback(UserServletContext context, String contentType, Folder folder, String filename) throws UserServletException, ServiceException, IOException {
// Disable the jetty timeout
disableJettyTimeout(context);
// Detect the charset of upload file.
PushbackInputStream pis = new PushbackInputStream(context.getRequestInputStream(), READ_AHEAD_BUFFER_SIZE);
byte[] buf = new byte[READ_AHEAD_BUFFER_SIZE];
int bytesRead = pis.read(buf, 0, READ_AHEAD_BUFFER_SIZE);
CharsetDetector detector = new CharsetDetector();
detector.setText(buf);
CharsetMatch match = detector.detect();
String guess = match.getName();
Charset charset;
if (guess != null) {
try {
charset = Charset.forName(guess);
} catch (IllegalArgumentException e) {
charset = Charsets.UTF_8;
}
} else {
charset = Charsets.UTF_8;
}
if (bytesRead > 0) {
pis.unread(buf, 0, bytesRead);
}
InputStreamReader isr = new InputStreamReader(pis, charset);
BufferedReader reader = new BufferedReader(isr);
try {
String format = context.params.get(UserServlet.QP_CSVFORMAT);
String locale = context.req.getParameter(UserServlet.QP_CSVLOCALE);
if (locale == null) {
locale = context.getLocale().toString();
}
List<Map<String, String>> contacts = ContactCSV.getContacts(reader, format, locale);
ItemId iidFolder = new ItemId(folder);
ImportContacts.ImportCsvContacts(context.opContext, context.targetMailbox, iidFolder, contacts);
} catch (ContactCSV.ParseException e) {
ZimbraLog.misc.debug("ContactCSV - ParseException thrown", e);
throw new UserServletException(HttpServletResponse.SC_BAD_REQUEST, "Could not parse csv file - Reason : " + e.getMessage());
} finally {
reader.close();
}
}
use of com.ibm.icu.text.CharsetDetector in project stanbol by apache.
the class CharsetRecognizer method detect.
public static String detect(InputStream in, String format, String encoding) throws IOException {
// the input stream must support marks
if (!in.markSupported()) {
throw new IOException("Mark not supported by input stream");
}
String result = null;
if (format != null) {
result = checkFormat(format, in);
if (result != null) {
return result;
}
}
// in case of HTML or XML check whether there is a charset
// specification; might be too fragile
CharsetDetector detector = new CharsetDetector();
if (encoding != null) {
detector.setDeclaredEncoding(encoding);
}
detector.setText(in);
CharsetMatch found = detector.detect();
result = found.getName();
LOG.debug("Encoding: " + result);
return result;
}
Aggregations