use of org.cyberneko.html.parsers.DOMFragmentParser in project translationstudio8 by heartsome.
the class MessageParser method htmlToText.
/**
* 将 html 格式的文本过滤掉标签.
* @param html
* html 格式的字符串
* @return String
* 过滤掉 html 标签后的文本。如果 html 为空,返回空串""
*/
private String htmlToText(String html) {
if (html == null) {
return "";
}
DOMFragmentParser parser = new DOMFragmentParser();
CoreDocumentImpl codeDoc = new CoreDocumentImpl();
InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes()));
inSource.setEncoding(textCharset);
DocumentFragment doc = codeDoc.createDocumentFragment();
try {
parser.parse(inSource, doc);
} catch (Exception e) {
return "";
}
textBuffer = new StringBuffer();
processNode(doc);
return textBuffer.toString();
}
Aggregations