use of org.jsoup.nodes.Document in project jsoup by jhy.
the class UrlConnectTest method followsRelativeDotRedirect.
@Test
public void followsRelativeDotRedirect() throws IOException {
// redirects to "./ok.html", should resolve to http://direct.infohound.net/tools/ok.html
// to ./ok.html
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel-dot.pl");
Document doc = con.post();
assertTrue(doc.title().contains("OK"));
assertEquals(doc.location(), "http://direct.infohound.net/tools/ok.html");
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class UrlConnectTest method multiCookieSet.
@Test
public void multiCookieSet() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
Connection.Response res = con.execute();
// test cookies set by redirect:
Map<String, String> cookies = res.cookies();
assertEquals("asdfg123", cookies.get("token"));
assertEquals("jhy", cookies.get("uid"));
// send those cookies into the echo URL by map:
Document doc = Jsoup.connect(echoURL).cookies(cookies).get();
assertEquals("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc));
}
use of org.jsoup.nodes.Document in project webmagic by code4craft.
the class CharsetUtils method detectCharset.
public static String detectCharset(String contentType, byte[] contentBytes) throws IOException {
String charset;
// charset
// 1、encoding in http header Content-Type
charset = UrlUtils.getCharset(contentType);
if (StringUtils.isNotBlank(contentType)) {
logger.debug("Auto get charset: {}", charset);
return charset;
}
// use default charset to decode first time
Charset defaultCharset = Charset.defaultCharset();
String content = new String(contentBytes, defaultCharset);
// 2、charset in meta
if (StringUtils.isNotEmpty(content)) {
Document document = Jsoup.parse(content);
Elements links = document.select("meta");
for (Element link : links) {
// 2.1、html4.01 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
String metaContent = link.attr("content");
String metaCharset = link.attr("charset");
if (metaContent.indexOf("charset") != -1) {
metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length());
charset = metaContent.split("=")[1];
break;
} else // 2.2、html5 <meta charset="UTF-8" />
if (StringUtils.isNotEmpty(metaCharset)) {
charset = metaCharset;
break;
}
}
}
logger.debug("Auto get charset: {}", charset);
// 3、todo use tools as cpdetector for content decode
return charset;
}
use of org.jsoup.nodes.Document in project cucumber-jvm by cucumber.
the class HTMLFormatterTest method writes_index_html.
@Test
public void writes_index_html() throws IOException {
URL indexHtml = new URL(outputDir, "index.html");
Document document = Jsoup.parse(new File(indexHtml.getFile()), "UTF-8");
Element reportElement = document.body().getElementsByClass("cucumber-report").first();
assertEquals("", reportElement.text());
}
use of org.jsoup.nodes.Document in project opennms by OpenNMS.
the class HttpCollectionHandler method fillCollectionSet.
@Override
protected void fillCollectionSet(String urlString, Request request, CollectionAgent agent, CollectionSetBuilder builder, XmlSource source) throws Exception {
Document doc = getJsoupDocument(urlString, request);
for (XmlGroup group : source.getXmlGroups()) {
LOG.debug("fillCollectionSet: getting resources for XML group {} using selector {}", group.getName(), group.getResourceXpath());
Date timestamp = getTimeStamp(doc, group);
Elements elements = doc.select(group.getResourceXpath());
LOG.debug("fillCollectionSet: {} => {}", group.getResourceXpath(), elements);
String resourceName = getResourceName(elements, group);
LOG.debug("fillCollectionSet: processing XML resource {}", resourceName);
final Resource collectionResource = getCollectionResource(agent, resourceName, group.getResourceType(), timestamp);
LOG.debug("fillCollectionSet: processing resource {}", collectionResource);
for (XmlObject object : group.getXmlObjects()) {
Elements el = elements.select(object.getXpath());
if (el == null) {
LOG.info("No value found for object named '{}'. Skipping.", object.getName());
}
builder.withAttribute(collectionResource, group.getName(), object.getName(), el.html(), object.getDataType());
}
processXmlResource(builder, collectionResource, resourceName, group.getName());
}
}
Aggregations