use of org.jsoup.Connection.Response in project crawler-jsoup-maven by bluetata.
the class AccResHead2ParseHTML method main.
public static void main(String[] args) throws Exception {
// String _url = "http://61.161.221.4:8088/mainservlet?actionType=INDEX";
String _url = "https://rate.taobao.com/feedRateList.htm?auctionNumId=552463737787&userNumId=1703495879¤tPageNum=1&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvqvvRvPpvUpCkvvvvvjiPPLcyljlbRsqwsjnEPmPpsjt8RLMvtjiER2q9ljnvRsyCvvBvpvvv9phv2n1w3xqBzYswM20S79wCvvNwzHi4z0CNiQhvChCvCCptvpvhphvvvvyCvh1vVxOvITlz8eQEfaAK53n5WDKt5BwsWD6rfuVHR4hSoAZnD704deDHEcqhaXTAVAIanixreTt%2BCclWQRp4e0Q4b64B9CkaU6UsxI2hKphv8hCvvvvvvhCvphvZJ9vvpu1vpC9CvvC216CvHjIvvhPjphvZK9vvpYJivpvUphvh3cUYvR7EvpvVp6WUCEIXvphvCyCCvvvvvvGCvvpvvvvv3QhvChCCvvvtvpvhphvvv86CvvDvppWpJpCv7OQ%3D&_ksTS=1519956772500_2207&callback=jsonp_tbcrate_reviews_list";
String defaultCharset = "UTF-8";
// according to response header to get parsed page char-set.
Connection connection = Jsoup.connect(_url).userAgent(// User-Agent of Chrome 55
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36").referrer("http://blog.csdn.net/").header("Content-Type", "application/json; charset=GBK").header("Accept", "text/plain, */*; q=0.01").header("Accept-Encoding", "gzip,deflate,sdch").header("Accept-Language", "es-ES,es;q=0.8").header("Connection", "keep-alive").header("X-Requested-With", "XMLHttpRequest").maxBodySize(100).timeout(1000 * 10).method(Connection.Method.POST);
Response response = connection.execute();
String charset = response.charset();
System.out.println("charset:" + charset);
if (null != charset && !charset.isEmpty()) {
defaultCharset = charset;
}
// Document doc = Jsoup.connect("http://blog.csdn.net/")
// .cookies(response.cookies())
// .timeout(10 * 10000)
// .get();
Document doc = Jsoup.parse(new URL(_url).openStream(), defaultCharset, _url);
System.out.println(doc);
System.out.println("----------:" + JsoupUtil.getStaticCharset(doc));
}
use of org.jsoup.Connection.Response in project portfolio by buchen.
the class YahooFinanceQuoteFeed method requestData.
private String requestData(Security security, LocalDate startDate, Crumb requestCrumb) throws IOException {
LocalDate stopDate = LocalDate.now();
String wknUrl = //
MessageFormat.format(//
HISTORICAL_URL, //
security.getTickerSymbol(), //
String.valueOf(startDate.atStartOfDay(ZoneId.systemDefault()).toEpochSecond()), String.valueOf(stopDate.atStartOfDay(ZoneId.systemDefault()).toEpochSecond()), URLEncoder.encode(requestCrumb.getId(), StandardCharsets.UTF_8.name()));
Response response = //
Jsoup.connect(wknUrl).userAgent(//
OnlineHelper.getUserAgent()).cookies(//
requestCrumb.getCookies()).timeout(30000).execute();
if (response.statusCode() != HttpURLConnection.HTTP_OK)
throw new IOException(MessageFormat.format(Messages.MsgErrorUnexpectedStatusCode, security.getTickerSymbol(), response.statusCode(), wknUrl));
return response.body();
}
use of org.jsoup.Connection.Response in project portfolio by buchen.
the class YahooFinanceQuoteFeed method updateLatestQuotes.
@Override
public final boolean updateLatestQuotes(Security security, List<Exception> errors) {
String wknUrl = // $NON-NLS-1$
MessageFormat.format(// $NON-NLS-1$
"https://de.finance.yahoo.com/quote/{0}?ltr=1", security.getTickerSymbol());
try {
Response response = //
Jsoup.connect(wknUrl).userAgent(//
OnlineHelper.getUserAgent()).timeout(30000).execute();
String body = response.body();
// some quick and dirty extraction. This scraping code will anyway
// not last long.
// $NON-NLS-1$
int startIndex = body.indexOf("QuoteSummaryStore");
if (startIndex < 0)
return false;
LatestSecurityPrice price = new LatestSecurityPrice();
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> time = extract(body, startIndex, "\"regularMarketTime\":", ",");
if (time.isPresent()) {
long epoch = Long.parseLong(time.get());
price.setDate(Instant.ofEpochSecond(epoch).atZone(ZoneId.systemDefault()).toLocalDate());
}
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> value = extract(body, startIndex, "\"regularMarketPrice\":{\"raw\":", ",");
if (value.isPresent())
price.setValue(asPrice(value.get()));
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> previousClose = extract(body, startIndex, "\"regularMarketPreviousClose\":{\"raw\":", ",");
if (previousClose.isPresent())
price.setPreviousClose(asPrice(previousClose.get()));
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> high = extract(body, startIndex, "\"regularMarketDayHigh\":{\"raw\":", ",");
if (high.isPresent())
price.setHigh(asPrice(high.get()));
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> low = extract(body, startIndex, "\"regularMarketDayLow\":{\"raw\":", ",");
if (low.isPresent())
price.setLow(asPrice(low.get()));
// $NON-NLS-1$ //$NON-NLS-2$
Optional<String> volume = extract(body, startIndex, "\"regularMarketVolume\":{\"raw\":", ",");
if (volume.isPresent())
price.setVolume(asNumber(volume.get()));
if (price.getDate() == null || price.getValue() <= 0) {
errors.add(new IOException(body));
return false;
}
security.setLatest(price);
return true;
} catch (IOException | ParseException e) {
errors.add(e);
return false;
}
}
Aggregations