use of org.htmlparser.lexer.Lexer in project jforum2 by rafaelsteil.
the class SafeHtml method makeSafe.
/**
* Given an input, makes it safe for HTML displaying.
* Removes any not allowed HTML tag or attribute, as well
* unwanted Javascript statements inside the tags.
* @param contents the input to analyze
* @return the modified and safe string
*/
public String makeSafe(String contents) {
if (contents == null || contents.length() == 0) {
return contents;
}
StringBuffer sb = new StringBuffer(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
boolean isTextNode = node instanceof TextNode;
if (isTextNode) {
// Text nodes are raw data, so we just
// strip off all possible html content
String text = node.toHtml();
if (text.indexOf('>') > -1 || text.indexOf('<') > -1) {
StringBuffer tmp = new StringBuffer(text);
ViewCommon.replaceAll(tmp, "<", "<");
ViewCommon.replaceAll(tmp, ">", ">");
ViewCommon.replaceAll(tmp, "\"", """);
node.setText(tmp.toString());
}
}
if (isTextNode || (node instanceof Tag && this.isTagWelcome(node))) {
sb.append(node.toHtml());
} else {
StringBuffer tmp = new StringBuffer(node.toHtml());
ViewCommon.replaceAll(tmp, "<", "<");
ViewCommon.replaceAll(tmp, ">", ">");
sb.append(tmp.toString());
}
}
} catch (Exception e) {
throw new ForumException("Error while parsing HTML: " + e, e);
}
return sb.toString();
}
use of org.htmlparser.lexer.Lexer in project portfolio by buchen.
the class DestatisCPIFeed method getConsumerPriceIndices.
@Override
public List<ConsumerPriceIndex> getConsumerPriceIndices() throws IOException {
try {
disableCertificateValidation();
URL url = new URL(// $NON-NLS-1$
"https://www.destatis.de/DE/ZahlenFakten/GesamtwirtschaftUmwelt/Preise/Verbraucherpreisindizes/Tabellen_/VerbraucherpreiseKategorien.html");
Lexer lexer = new Lexer(url.openConnection());
List<ConsumerPriceIndex> prices = new Visitor().visit(lexer);
if (prices.isEmpty())
throw new IOException(Messages.MsgResponseContainsNoIndices);
return prices;
} catch (ParserException e) {
throw new IOException(e);
}
}
use of org.htmlparser.lexer.Lexer in project portfolio by buchen.
the class DestatisCPIFeedTest method testParsingHtml.
@Test
public void testParsingHtml() throws IOException, ParserException {
try (Scanner scanner = new Scanner(getClass().getResourceAsStream("response_destatis.txt"), "UTF-8")) {
String html = scanner.useDelimiter("\\A").next();
Lexer lexer = new Lexer(html);
List<ConsumerPriceIndex> prices = new DestatisCPIFeed.Visitor().visit(lexer);
assertThat(prices.size(), equalTo(19 * /* years in file */
12 + 6));
ConsumerPriceIndex p = prices.get(5);
assertThat(p.getYear(), equalTo(2012));
assertThat(p.getMonth(), equalTo(Month.JANUARY.getValue()));
assertThat(p.getIndex(), equalTo(11150));
}
}
use of org.htmlparser.lexer.Lexer in project jforum2 by rafaelsteil.
the class SafeHtml method ensureAllAttributesAreSafe.
/**
* Given an input, analyze each HTML tag and remove unsecure attributes from them.
* @param contents The content to verify
* @return the content, secure.
*/
public String ensureAllAttributesAreSafe(String contents) {
StringBuffer sb = new StringBuffer(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
this.checkAndValidateAttributes(tag, false);
sb.append(tag.toHtml());
} else {
sb.append(node.toHtml());
}
}
} catch (Exception e) {
throw new ForumException("Problems while parsing HTML: " + e, e);
}
return sb.toString();
}
Aggregations