Examples with CharsetMatch - com.ibm.icu.text.CharsetMatch

Example 36 with CharsetMatch

use of com.ibm.icu.text.CharsetMatch in project ultimate-cube by G3G4X5X6.

the class EncodeConversion method readDir.

@SneakyThrows
private void readDir(File file) {
    File[] fs = file.listFiles();
    for (File f : fs) {
        if (// 若是目录，则递归打印该目录下的文件
        f.isDirectory())
            readDir(f);
        if (f.isFile()) {
            log.debug("File: " + f.getPath());
            CharsetMatch cm = CommonUtil.checkCharset(new BufferedInputStream(new FileInputStream(f)));
            log.debug("CheckCharset:" + cm.getName());
            leftModel.addRow(new String[] { f.getName(), cm.getName(), String.valueOf(cm.getConfidence()) });
            globalFile.add(f);
            progressBar.setValue(globalFile.size());
        }
    }
}

Also used : CharsetMatch(com.ibm.icu.text.CharsetMatch) BufferedInputStream(java.io.BufferedInputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) SneakyThrows(lombok.SneakyThrows)

Example 37 with CharsetMatch

use of com.ibm.icu.text.CharsetMatch in project ultimate-cube by G3G4X5X6.

the class CommonUtil method checkCharset.

public static CharsetMatch checkCharset(InputStream input) {
    // BufferedInputStream bis = new BufferedInputStream(input);
    CharsetDetector cd = new CharsetDetector();
    try {
        cd.setText(input);
    } catch (IOException e) {
        try {
            input.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        e.printStackTrace();
    }
    CharsetMatch cm = cd.detect();
    return cm;
}

Also used : CharsetMatch(com.ibm.icu.text.CharsetMatch) CharsetDetector(com.ibm.icu.text.CharsetDetector)

Example 38 with CharsetMatch

use of com.ibm.icu.text.CharsetMatch in project commafeed by Athou.

the class FeedUtils method detectEncoding.

/**
 * Detect encoding by analyzing characters in the array
 */
public static Charset detectEncoding(byte[] bytes) {
    String encoding = "UTF-8";
    CharsetDetector detector = new CharsetDetector();
    detector.setText(bytes);
    CharsetMatch match = detector.detect();
    if (match != null) {
        encoding = match.getName();
    }
    if (encoding.equalsIgnoreCase("ISO-8859-1")) {
        encoding = "windows-1252";
    }
    return Charset.forName(encoding);
}

Also used : CharsetMatch(com.ibm.icu.text.CharsetMatch) CharsetDetector(com.ibm.icu.text.CharsetDetector)

Example 39 with CharsetMatch

use of com.ibm.icu.text.CharsetMatch in project document-management-system by openkm.

the class MailUtils method getText.

/**
 * Get text from message
 */
private static String getText(Part p) throws MessagingException, IOException {
    if (p.isMimeType("multipart/alternative")) {
        // prefer html over plain text
        Multipart mp = (Multipart) p.getContent();
        String text = "T" + NO_BODY;
        for (int i = 0; i < mp.getCount(); i++) {
            Part bp = mp.getBodyPart(i);
            if (bp.isMimeType("text/plain")) {
                text = getText(bp);
            } else if (bp.isMimeType("text/html")) {
                text = getText(bp);
                break;
            } else {
                text = getText(bp);
            }
        }
        return text;
    } else if (p.isMimeType("multipart/*")) {
        Multipart mp = (Multipart) p.getContent();
        for (int i = 0; i < mp.getCount(); i++) {
            String s = getText(mp.getBodyPart(i));
            if (s != null) {
                return s;
            }
        }
    } else if (p.isMimeType("message/rfc822")) {
        Part np = (Part) p.getContent();
        String s = getText(np);
        if (s != null) {
            return s;
        }
    } else {
        String str;
        try {
            Object obj = p.getContent();
            if (obj instanceof InputStream) {
                InputStream is = (InputStream) obj;
                CharsetDetector detector = new CharsetDetector();
                BufferedInputStream bis = new BufferedInputStream(is);
                detector.setText(bis);
                CharsetMatch cm = detector.detect();
                Reader rd;
                if (cm == null) {
                    rd = new InputStreamReader(bis);
                } else {
                    rd = cm.getReader();
                    if (rd == null) {
                        rd = new InputStreamReader(bis);
                    }
                }
                str = IOUtils.toString(rd);
                IOUtils.closeQuietly(rd);
                IOUtils.closeQuietly(bis);
                IOUtils.closeQuietly(is);
            } else if (obj instanceof String) {
                str = (String) obj;
            } else {
                str = obj.toString();
            }
        } catch (UnsupportedEncodingException e) {
            InputStream is = p.getInputStream();
            CharsetDetector detector = new CharsetDetector();
            detector.setText(new BufferedInputStream(is));
            CharsetMatch cm = detector.detect();
            Reader rd = cm.getReader();
            str = IOUtils.toString(rd);
            IOUtils.closeQuietly(rd);
            IOUtils.closeQuietly(is);
        }
        if (p.isMimeType("text/html")) {
            return "H" + str;
        } else if (p.isMimeType("text/plain")) {
            return "T" + str;
        } else if (StringUtils.containsIgnoreCase(str, "<html>")) {
            return "H" + str;
        } else {
            // Otherwise let's set as text/plain
            return "T" + str;
        }
    }
    return "T" + NO_BODY;
}

Also used : CharsetMatch(com.ibm.icu.text.CharsetMatch) CharsetDetector(com.ibm.icu.text.CharsetDetector) PortableRemoteObject(javax.rmi.PortableRemoteObject)

Example 40 with CharsetMatch

use of com.ibm.icu.text.CharsetMatch in project document-management-system by openkm.

the class ConverterServlet method service.

protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    log.debug("service({}, {})", request, response);
    request.setCharacterEncoding("UTF-8");
    String uuid = WebUtils.getString(request, "uuid");
    boolean inline = WebUtils.getBoolean(request, "inline");
    boolean print = WebUtils.getBoolean(request, "print");
    boolean toPdf = WebUtils.getBoolean(request, "toPdf");
    boolean toSwf = WebUtils.getBoolean(request, "toSwf");
    CharsetDetector detector = new CharsetDetector();
    File tmp = null;
    File tmpDir = null;
    InputStream is = null;
    ConverterListener listener = new ConverterListener(ConverterListener.STATUS_LOADING);
    updateSessionManager(request);
    try {
        // Now an document can be located by UUID
        if (!uuid.equals("")) {
            // Saving listener to session
            request.getSession().setAttribute(FILE_CONVERTER_STATUS, listener);
            String path = OKMRepository.getInstance().getNodePath(null, uuid);
            Document doc = OKMDocument.getInstance().getProperties(null, path);
            String fileName = PathUtils.getName(doc.getPath());
            // Optinal append version to download
            if (Config.VERSION_APPEND_DOWNLOAD) {
                String versionToAppend = OKMDocument.getInstance().getProperties(null, uuid).getActualVersion().getName();
                String[] nameParts = fileName.split("\\.(?=[^\\.]+$)");
                fileName = nameParts[0] + (Config.VERSION_APPEND_DOWNLOAD ? (" rev " + versionToAppend) : "") + "." + nameParts[1];
            }
            // Save content to temporary file
            tmp = File.createTempFile("okm", "." + FileUtils.getFileExtension(fileName));
            // If is used to preview, it should workaround the DOWNLOAD extended permission.
            is = new DbDocumentModule().getContent(null, path, false, !toSwf);
            // Text files may need encoding conversion
            if (doc.getMimeType().startsWith("text/")) {
                detector.setText(new BufferedInputStream(is));
                CharsetMatch cm = detector.detect();
                Reader rd = cm.getReader();
                FileUtils.copy(rd, tmp);
                IOUtils.closeQuietly(is);
                IOUtils.closeQuietly(rd);
            } else {
                FileUtils.copy(is, tmp);
                IOUtils.closeQuietly(is);
            }
            // Prepare conversion
            ConversionData cd = new ConversionData();
            cd.uuid = uuid;
            cd.fileName = fileName;
            cd.mimeType = doc.getMimeType();
            cd.file = tmp;
            if (toPdf && !cd.mimeType.equals(MimeTypeConfig.MIME_PDF)) {
                try {
                    listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_PDF);
                    toPDF(cd);
                    listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_PDF_FINISHED);
                } catch (ConversionException e) {
                    log.error(e.getMessage(), e);
                    listener.setError(e.getMessage());
                    InputStream tis = ConverterServlet.class.getResourceAsStream("conversion_problem.pdf");
                    FileUtils.copy(tis, cd.file);
                }
            } else if (toSwf && !cd.mimeType.equals(MimeTypeConfig.MIME_SWF)) {
                try {
                    listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_SWF);
                    toSWF(cd);
                    listener.setStatus(ConverterListener.STATUS_CONVERTING_TO_SWF_FINISHED);
                } catch (ConversionException e) {
                    log.error(e.getMessage(), e);
                    listener.setError(e.getMessage());
                    InputStream tis = ConverterServlet.class.getResourceAsStream("conversion_problem.swf");
                    FileUtils.copy(tis, cd.file);
                }
            }
            if (toPdf && print) {
                cd.file = PDFUtils.markToPrint(cd.file);
            }
            // Send back converted document
            listener.setStatus(ConverterListener.STATUS_SENDING_FILE);
            WebUtils.sendFile(request, response, cd.fileName, cd.mimeType, inline, cd.file);
        } else {
            log.error("Missing Conversion Parameters");
            response.setContentType(MimeTypeConfig.MIME_TEXT);
            PrintWriter out = response.getWriter();
            out.print("Missing Conversion Parameters");
            out.flush();
            out.close();
        }
    } catch (PathNotFoundException e) {
        log.warn(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_PathNotFound), e.getMessage()));
    } catch (AccessDeniedException e) {
        log.warn(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_AccessDenied), e.getMessage()));
    } catch (RepositoryException e) {
        log.warn(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_Repository), e.getMessage()));
    } catch (IOException e) {
        log.error(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_IO), e.getMessage()));
    } catch (DatabaseException e) {
        log.error(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_Database), e.getMessage()));
    } catch (Exception e) {
        log.error(e.getMessage(), e);
        listener.setError(e.getMessage());
        throw new ServletException(new OKMException(ErrorCode.get(ErrorCode.ORIGIN_OKMDownloadService, ErrorCode.CAUSE_General), e.getMessage()));
    } finally {
        listener.setConversionFinish(true);
        org.apache.commons.io.FileUtils.deleteQuietly(tmp);
        org.apache.commons.io.FileUtils.deleteQuietly(tmpDir);
    }
    log.debug("service: void");
}

Also used : CharsetDetector(com.ibm.icu.text.CharsetDetector) OKMDocument(com.openkm.api.OKMDocument) Document(com.openkm.bean.Document) ServletException(javax.servlet.ServletException) OKMException(com.openkm.frontend.client.OKMException) AutomationException(com.openkm.automation.AutomationException) NotImplementedException(org.apache.commons.lang.NotImplementedException) ServletException(javax.servlet.ServletException) CharsetMatch(com.ibm.icu.text.CharsetMatch) OKMException(com.openkm.frontend.client.OKMException) DbDocumentModule(com.openkm.module.db.DbDocumentModule)

Aggregations

CharsetMatch (com.ibm.icu.text.CharsetMatch)43 CharsetDetector (com.ibm.icu.text.CharsetDetector)28 IOException (java.io.IOException)12 BufferedInputStream (java.io.BufferedInputStream)8 InputStream (java.io.InputStream)5 File (java.io.File)4 FileInputStream (java.io.FileInputStream)4 IllegalCharsetNameException (java.nio.charset.IllegalCharsetNameException)3 BufferedReader (java.io.BufferedReader)2 Charset (java.nio.charset.Charset)2 Nullable (javax.annotation.Nullable)2 ServletException (javax.servlet.ServletException)2 SneakyThrows (lombok.SneakyThrows)2 DocumentFile (androidx.documentfile.provider.DocumentFile)1 Getter (burp.Getter)1 IExtensionHelpers (burp.IExtensionHelpers)1 OKMDocument (com.openkm.api.OKMDocument)1 AutomationException (com.openkm.automation.AutomationException)1 Document (com.openkm.bean.Document)1 OKMException (com.openkm.frontend.client.OKMException)1