use of com.ibm.icu.text.CharsetMatch in project ultimate-cube by G3G4X5X6.
the class EncodeConversion method readDir.
@SneakyThrows
private void readDir(File file) {
File[] fs = file.listFiles();
for (File f : fs) {
if (// 若是目录,则递归打印该目录下的文件
f.isDirectory())
readDir(f);
if (f.isFile()) {
log.debug("File: " + f.getPath());
CharsetMatch cm = CommonUtil.checkCharset(new BufferedInputStream(new FileInputStream(f)));
log.debug("CheckCharset:" + cm.getName());
leftModel.addRow(new String[] { f.getName(), cm.getName(), String.valueOf(cm.getConfidence()) });
globalFile.add(f);
progressBar.setValue(globalFile.size());
}
}
}
use of com.ibm.icu.text.CharsetMatch in project ultimate-cube by G3G4X5X6.
the class CommonUtil method checkCharset.
public static CharsetMatch checkCharset(InputStream input) {
// BufferedInputStream bis = new BufferedInputStream(input);
CharsetDetector cd = new CharsetDetector();
try {
cd.setText(input);
} catch (IOException e) {
try {
input.close();
} catch (IOException e1) {
e1.printStackTrace();
}
e.printStackTrace();
}
CharsetMatch cm = cd.detect();
return cm;
}
use of com.ibm.icu.text.CharsetMatch in project commafeed by Athou.
the class FeedUtils method detectEncoding.
/**
* Detect encoding by analyzing characters in the array
*/
public static Charset detectEncoding(byte[] bytes) {
String encoding = "UTF-8";
CharsetDetector detector = new CharsetDetector();
detector.setText(bytes);
CharsetMatch match = detector.detect();
if (match != null) {
encoding = match.getName();
}
if (encoding.equalsIgnoreCase("ISO-8859-1")) {
encoding = "windows-1252";
}
return Charset.forName(encoding);
}
use of com.ibm.icu.text.CharsetMatch in project document-management-system by openkm.
the class MailUtils method getText.
/**
* Get text from message
*/
private static String getText(Part p) throws MessagingException, IOException {
if (p.isMimeType("multipart/alternative")) {
// prefer html over plain text
Multipart mp = (Multipart) p.getContent();
String text = "T" + NO_BODY;
for (int i = 0; i < mp.getCount(); i++) {
Part bp = mp.getBodyPart(i);
if (bp.isMimeType("text/plain")) {
text = getText(bp);
} else if (bp.isMimeType("text/html")) {
text = getText(bp);
break;
} else {
text = getText(bp);
}
}
return text;
} else if (p.isMimeType("multipart/*")) {
Multipart mp = (Multipart) p.getContent();
for (int i = 0; i < mp.getCount(); i++) {
String s = getText(mp.getBodyPart(i));
if (s != null) {
return s;
}
}
} else if (p.isMimeType("message/rfc822")) {
Part np = (Part) p.getContent();
String s = getText(np);
if (s != null) {
return s;
}
} else {
String str;
try {
Object obj = p.getContent();
if (obj instanceof InputStream) {
InputStream is = (InputStream) obj;
CharsetDetector detector = new CharsetDetector();
BufferedInputStream bis = new BufferedInputStream(is);
detector.setText(bis);
CharsetMatch cm = detector.detect();
Reader rd;
if (cm == null) {
rd = new InputStreamReader(bis);
} else {
rd = cm.getReader();
if (rd == null) {
rd = new InputStreamReader(bis);
}
}
str = IOUtils.toString(rd);
IOUtils.closeQuietly(rd);
IOUtils.closeQuietly(bis);
IOUtils.closeQuietly(is);
} else if (obj instanceof String) {
str = (String) obj;
} else {
str = obj.toString();
}
} catch (UnsupportedEncodingException e) {
InputStream is = p.getInputStream();
CharsetDetector detector = new CharsetDetector();
detector.setText(new BufferedInputStream(is));
CharsetMatch cm = detector.detect();
Reader rd = cm.getReader();
str = IOUtils.toString(rd);
IOUtils.closeQuietly(rd);
IOUtils.closeQuietly(is);
}
if (p.isMimeType("text/html")) {
return "H" + str;
} else if (p.isMimeType("text/plain")) {
return "T" + str;
} else if (StringUtils.containsIgnoreCase(str, "<html>")) {
return "H" + str;
} else {
// Otherwise let's set as text/plain
return "T" + str;
}
}
return "T" + NO_BODY;
}
use of com.ibm.icu.text.CharsetMatch in project document-management-system by openkm.
the class ConverterServlet method service.
protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
log.debug("service({}, {})", request, response);
request.setCharacterEncoding("UTF-8");
String uuid = WebUtils.getString(request, "uuid");
boolean inline = WebUtils.getBoolean(request, "inline");
boolean print = WebUtils.getBoolean(request, "print");
boolean toPdf = WebUtils.getBoolean(request, "toPdf");
boolean toSwf = WebUtils.getBoolean(request, "toSwf");
CharsetDetector detector = new CharsetDetector();
File tmp = null;
File tmpDir = null;
InputStream is = null;
ConverterListener listener = new ConverterListener(ConverterListener.STATUS_LOADING);
updateSessionManager(request);
try {
// Now an document can be located by UUID
if (!uuid.equals("")) {
// Saving listener to session
request.getSession().setAttribute(FILE_CONVERTER_STATUS, listener);
String path = OKMRepository.getInstance().getNodePath(null, uuid);
Document doc = OKMDocument.getInstance().getProperties(null, path);
String fileName = PathUtils.getName(doc.getPath());
// Optinal append version to download
if (Config.VERSION_APPEND_DOWNLOAD) {
String versionToAppend = OKMDocument.getInstance().getProperties(null, uuid).getActualVersion().getName();
String[] nameParts = fileName.split("\\.(?=[^\\.]+$)");
fileName = nameParts[0] + (Config.VERSION_APPEND_DOWNLOAD ? (" rev " + versionToAppend) : "") + "." + nameParts[1];
}
// Save content to temporary file
tmp = File.createTempFile("okm", "." + FileUtils.getFileExtension(fileName));
// If is used to preview, it should workaround the DOWNLOAD extended permission.
is = new DbDocumentModule().getContent(null, path, false, !toSwf);
// Text files may need encoding conversion
if (doc.getMimeType().startsWith("text/")) {
detector.setText(new BufferedInputStream(is));
CharsetMatch cm = detector.detect();
Reader rd = cm.getReader();
FileUtils.copy(rd, tmp);
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(rd);
} else {
FileUtils.copy(is, tmp);
IOUtils.closeQuietly(is);
}
// Prepare conversion
ConversionData cd = new ConversionData();
cd.uuid = uuid;
cd.fileName = fileName;
cd.mimeType = doc.getMimeType();
cd.file = tmp;
if (toPdf && !cd.mimeType.equals(MimeTypeConfig.MIME_PDF)) {
try {
listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_PDF);
toPDF(cd);
listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_PDF_FINISHED);
} catch (ConversionException e) {
log.error(e.getMessage(), e);
listener.setError(e.getMessage());
InputStream tis = ConverterServlet.class.getResourceAsStream("conversion_problem.pdf");
FileUtils.copy(tis, cd.file);
}
} else if (toSwf && !cd.mimeType.equals(MimeTypeConfig.MIME_SWF)) {
try {
listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_SWF);
toSWF(cd);
listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_SWF_FINISHED);
} catch (ConversionException e) {
log.error(e.getMessage(), e);
listener.setError(e.getMessage());
InputStream tis = ConverterServlet.class.getResourceAsStream("conversion_problem.swf");
FileUtils.copy(tis, cd.file);
}
}
if (toPdf && print) {
cd.file = PDFUtils.markToPrint(cd.file);
}
// Send back converted document
listener.setStatus(ConverterListener.STATUS_SENDING_FILE);
WebUtils.sendFile(request, response, cd.fileName, cd.mimeType, inline, cd.file);
} else {
log.error("Missing Conversion Parameters");
response.setContentType(MimeTypeConfig.MIME_TEXT);
PrintWriter out = response.getWriter();
out.print("Missing Conversion Parameters");
out.flush();
out.close();
}
} catch (PathNotFoundException e) {
log.warn(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_PathNotFound), e.getMessage()));
} catch (AccessDeniedException e) {
log.warn(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_AccessDenied), e.getMessage()));
} catch (RepositoryException e) {
log.warn(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_Repository), e.getMessage()));
} catch (IOException e) {
log.error(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_IO), e.getMessage()));
} catch (DatabaseException e) {
log.error(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_Database), e.getMessage()));
} catch (Exception e) {
log.error(e.getMessage(), e);
listener.setError(e.getMessage());
throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_General), e.getMessage()));
} finally {
listener.setConversionFinish(true);
org.apache.commons.io.FileUtils.deleteQuietly(tmp);
org.apache.commons.io.FileUtils.deleteQuietly(tmpDir);
}
log.debug("service: void");
}
Aggregations