use of com.ibm.icu.text.CharsetMatch in project knife by bit4woo.
the class HttpMessageCharSet method getCharset.
public static String getCharset(byte[] requestOrResponse) {
IExtensionHelpers helpers = BurpExtender.getCallbacks().getHelpers();
Getter getter = new Getter(helpers);
boolean isRequest = true;
if (new String(requestOrResponse).startsWith("HTTP/")) {
// response
isRequest = false;
}
String contentType = getter.getHeaderValueOf(isRequest, requestOrResponse, "Content-Type");
// http post的默认编码
String tmpcharSet = "ISO-8859-1";
if (contentType != null) {
// 1、尝试从contentTpye中获取
if (contentType.toLowerCase().contains("charset=")) {
tmpcharSet = contentType.toLowerCase().split("charset=")[1];
}
}
if (tmpcharSet == null) {
// 2、尝试使用ICU4J进行编码的检测
CharsetDetector detector = new CharsetDetector();
detector.setText(requestOrResponse);
CharsetMatch cm = detector.detect();
tmpcharSet = cm.getName();
}
tmpcharSet = tmpcharSet.toLowerCase().trim();
// 常见的编码格式有ASCII、ANSI、GBK、GB2312、UTF-8、GB18030和UNICODE等。
List<String> commonCharSet = Arrays.asList("ASCII,ANSI,GBK,GB2312,UTF-8,GB18030,UNICODE,utf8".toLowerCase().split(","));
for (String item : commonCharSet) {
if (tmpcharSet.contains(item)) {
tmpcharSet = item;
}
}
if (tmpcharSet.equals("utf8"))
tmpcharSet = "utf-8";
return tmpcharSet;
}
use of com.ibm.icu.text.CharsetMatch in project knife by bit4woo.
the class CharSetHelper method detectCharset.
/**
* utf8 utf-8都是可以的。
* @param requestOrResponse
* @return
*/
public static String detectCharset(byte[] requestOrResponse) {
IExtensionHelpers helpers = BurpExtender.getCallbacks().getHelpers();
Getter getter = new Getter(helpers);
boolean isRequest = true;
if (new String(requestOrResponse).startsWith("HTTP/")) {
// response
isRequest = false;
}
String contentType = getter.getHeaderValueOf(isRequest, requestOrResponse, "Content-Type");
// 1、尝试从contentTpye中获取
if (contentType != null) {
if (contentType.toLowerCase().contains("charset=")) {
String tmpcharSet = contentType.toLowerCase().split("charset=")[1];
if (tmpcharSet != null && tmpcharSet.length() > 0) {
return tmpcharSet;
}
}
}
// 2、尝试使用ICU4J进行编码的检测
CharsetDetector detector = new CharsetDetector();
detector.setText(requestOrResponse);
CharsetMatch cm = detector.detect();
if (cm != null) {
return cm.getName();
}
// 3、http post的默认编码
return "ISO-8859-1";
}
use of com.ibm.icu.text.CharsetMatch in project tablesaw by jtablesaw.
the class Source method getCharSet.
/**
* Returns the likely charset for the given byte[], if it can be determined. A confidence score is
* calculated. If the score is less than 60 (on a 1 to 100 interval) the system default charset is
* returned instead.
*
* @param buffer The byte array to evaluate
* @return The likely charset, or the system default charset
*/
private static Charset getCharSet(byte[] buffer) {
CharsetDetector detector = new CharsetDetector();
detector.setText(buffer);
CharsetMatch match = detector.detect();
if (match == null || match.getConfidence() < 60) {
return Charset.defaultCharset();
}
return Charset.forName(match.getName());
}
use of com.ibm.icu.text.CharsetMatch in project htmlparser by validator.
the class IcuDetectorSniffer method sniff.
public Encoding sniff() throws IOException {
try {
CharsetDetector detector = new CharsetDetector();
detector.setText(this);
CharsetMatch match = detector.detect();
Encoding enc = Encoding.forName(match.getName());
Encoding actual = enc.getActualHtmlEncoding();
if (actual != null) {
enc = actual;
}
if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
return enc;
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
use of com.ibm.icu.text.CharsetMatch in project ultimate-cube by G3G4X5X6.
the class EncodeConversion method addToolBarActionListener.
private void addToolBarActionListener() {
importBtn.addActionListener(new AbstractAction() {
@SneakyThrows
@Override
public void actionPerformed(ActionEvent e) {
log.debug("导入待转换文件");
// 创建一个默认的文件选取器
JFileChooser fileChooser = new JFileChooser();
// 允许多选
fileChooser.setMultiSelectionEnabled(true);
// 设置文件选择的模式(只选文件、只选文件夹、文件和文件均可选)
fileChooser.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
// 打开文件选择框(线程将被阻塞, 直到选择框被关闭)
int result = fileChooser.showOpenDialog(App.mainFrame);
if (result == JFileChooser.APPROVE_OPTION) {
File[] files = fileChooser.getSelectedFiles();
// 设置进度条
progressBar.setVisible(true);
progressBar.setStringPainted(false);
progressBar.setIndeterminate(true);
// 创建后台任务
SwingWorker<String, Object> task = new SwingWorker<String, Object>() {
@Override
protected String doInBackground() throws Exception {
// 此处处于 SwingWorker 线程池中
for (File file : files) {
if (file.isDirectory()) {
log.debug("Directory: " + file.getAbsolutePath());
readDir(file);
} else {
log.debug("File: " + file.getPath());
CharsetMatch cm = CommonUtil.checkCharset(new BufferedInputStream(new FileInputStream(file)));
log.debug("CheckCharset:" + cm.getName());
leftModel.addRow(new String[] { file.getName(), cm.getName(), String.valueOf(cm.getConfidence()) });
globalFile.add(file);
progressBar.setValue(globalFile.size());
}
}
return "Hello";
}
@Override
protected void done() {
// 此方法将在后台任务完成后在事件调度线程中被回调
progressBar.setIndeterminate(false);
progressBar.setMaximum(globalFile.size());
progressBar.setValue(globalFile.size());
}
};
// 启动任务
task.execute();
}
}
});
exportBtn.addActionListener(new AbstractAction() {
@Override
public void actionPerformed(ActionEvent e) {
// 创建一个默认的文件选取器
JFileChooser fileChooser = new JFileChooser();
// 允许多选
fileChooser.setMultiSelectionEnabled(false);
// 设置文件选择的模式(只选文件、只选文件夹、文件和文件均可选)
fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
// 打开文件选择框(线程将被阻塞, 直到选择框被关闭)
int result = fileChooser.showOpenDialog(App.mainFrame);
if (result == JFileChooser.APPROVE_OPTION) {
outputDir = fileChooser.getSelectedFile();
}
}
});
conversionBtn.addActionListener(new AbstractAction() {
@SneakyThrows
@Override
public void actionPerformed(ActionEvent e) {
log.debug("开始转换文件编码");
if (globalFile.size() > 0) {
// TODO 每次重新转换需考虑缓存
rightModel.setRowCount(0);
// 设置进度条
// JProgressBar rogressBar = new JProgressBar();
progressPane.add(progressBar);
progressBar.setMaximum(globalFile.size());
progressBar.setValue(0);
progressBar.setVisible(true);
progressBar.setStringPainted(true);
// 创建后台任务
SwingWorker<String, Object> task = new SwingWorker<String, Object>() {
@Override
protected String doInBackground() throws Exception {
// 此处处于 SwingWorker 线程池中
Iterator<File> iterator = globalFile.iterator();
int i = 1;
// TODO 转换保存文件编码
while (iterator.hasNext()) {
File file = iterator.next();
log.debug(file.getPath());
CharsetMatch cm = CommonUtil.checkCharset(new BufferedInputStream(new FileInputStream(file)));
log.debug("CheckCharset:" + cm.getName());
// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), dstComboBox.getSelectedItem().toString()));
// BufferedWriter converionWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputDir.getAbsolutePath() + "/" + file.getName())), dstComboBox.getSelectedItem().toString()));
// String buffer = null;
// while ((buffer = bufferedReader.readLine()) != null) {
// converionWriter.write(buffer + "\n");
// log.debug(buffer);
// }
// bufferedReader.close();
// converionWriter.close();
Files.copy(file, new File(outputDir.getAbsolutePath() + "/" + file.getName()));
// FileUtil.convertCharset(new File(outputDir.getAbsolutePath() + "/" + file.getName()), Charset.forName(cm.getName()), Charset.forName(dstComboBox.getSelectedItem().toString()));
CharsetMatch tmp = CommonUtil.checkCharset(new BufferedInputStream(new FileInputStream(outputDir.getAbsolutePath() + "/" + file.getName())));
rightModel.addRow(new String[] { file.getName(), tmp.getName(), String.valueOf(tmp.getConfidence()) });
progressBar.setValue(i);
i++;
}
return "Hello";
}
@Override
protected void done() {
// 此方法将在后台任务完成后在事件调度线程中被回调
log.debug("文件编码转换完成");
}
};
// 启动任务
task.execute();
}
}
});
cleanBtn.addActionListener(new AbstractAction() {
@Override
public void actionPerformed(ActionEvent e) {
log.debug("清除缓存");
globalFile.clear();
leftModel.setRowCount(0);
rightModel.setRowCount(0);
progressBar.setValue(0);
progressBar.setVisible(false);
}
});
}
Aggregations