use of org.htmlparser.util.ParserException in project lucida by claritylab.
the class HTMLConverter method html2text.
/**
* Converts an HTML document into plain text.
*
* @param html HTML document
* @return plain text or <code>null</code> if the conversion failed
*/
public static synchronized String html2text(String html) {
// convert HTML document
StringBean sb = new StringBean();
// no links
sb.setLinks(false);
// replace non-breaking spaces
sb.setReplaceNonBreakingSpaces(true);
// replace sequences of whitespaces
sb.setCollapse(true);
Parser parser = new Parser();
try {
parser.setInputHTML(html);
parser.visitAllNodesWith(sb);
} catch (ParserException e) {
return null;
}
String docText = sb.getStrings();
// no content
if (docText == null)
docText = "";
return docText;
}
Aggregations