use of com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController in project yyl_example by Relucent.
the class HtmlUnitTest3 method main.
public static void main(String[] args) throws Exception {
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_68);
try {
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setRedirectEnabled(false);
webClient.getOptions().setAppletEnabled(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setPopupBlockerEnabled(true);
webClient.getOptions().setTimeout(10000);
// JS运行错误时,是否抛出异常
webClient.getOptions().setThrowExceptionOnScriptError(false);
// webClient.waitForBackgroundJavaScript(10 * 1000);
HtmlPage page = webClient.getPage("https://www.baidu.com/");
WebWindow webWindow = page.getEnclosingWindow();
System.out.println("# 等待页面加载");
waitFor(() -> {
DomElement input = page.getElementById("kw");
return input instanceof HtmlInput;
});
System.out.println("# 文本框输入 htmlunit ");
HtmlInput kw = (HtmlInput) page.getElementById("kw");
kw.setAttribute("value", "htmlunit");
System.out.println("# 触发回车事件");
Thread.sleep(1000);
// Enter
kw.type(13);
System.out.println("# 等待页面跳转");
Thread.sleep(1000);
HtmlPage page2 = (HtmlPage) webWindow.getEnclosedPage();
System.out.println(page2.getUrl());
DomNodeList<DomNode> nodes = page2.querySelectorAll(".result.c-container h3 a");
System.out.println("# 输出结果");
for (DomNode node : nodes) {
System.out.println(node.asText());
}
} finally {
webClient.close();
}
}
use of com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController in project wechat by dllwh.
the class HtmlUnitHandler method getWebClient.
/**
* ----------------------------------------------------- Fields start
*/
/**
* ----------------------------------------------------- Fields end
*/
/**
* ----------------------------------------------- [私有方法]
*/
/**
* @方法描述: 模拟特定浏览器
* @param browser
* @param proxy
* @return
*/
private WebClient getWebClient(CrawlParameter crawlPara) {
/**
* 模拟一个浏览器,可以选择IE、Chrome、Firefox等等
*/
WebClient webClient = null;
BrowserVersion browser = crawlPara.getBrowse();
if (null == crawlPara.getProxy()) {
webClient = new WebClient(browser);
} else {
// 代理服务器的配置,代理的配置很简单,只需要配置好地址、端口、用户名与密码即可
ProxyBean proxy = crawlPara.getProxy();
webClient = new WebClient(browser, proxy.getProxyHost(), proxy.getProxyPort());
}
/**
* 设置webClient的相关参数
*/
// 启用JavaScript解释器,默认为true(对于某些动态页面,这是必须的)
webClient.getOptions().setJavaScriptEnabled(crawlPara.isUseJs());
// 禁用css支持,可避免自动二次请求CSS进行渲染(对于某些动态页面,这是必须的)
webClient.getOptions().setCssEnabled(false);
// 启动客户端重定向
// webClient.getOptions().setRedirectEnabled(true);
// 忽略ssl认证
webClient.getOptions().setUseInsecureSSL(true);
// JavaScript运行错误时,是否抛出异常
webClient.getOptions().setThrowExceptionOnScriptError(false);
// JavaScript运行错误时,是否抛出 response 的错误
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
// 设置连接超时时间 ,这里是10S。如果为0,则无限期等待
webClient.getOptions().setTimeout(10 * 1000);
// 设置Ajax异步
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.setJavaScriptTimeout(600 * 1000);
webClient.getOptions().setActiveXNative(false);
return webClient;
}
Aggregations