Search in sources :

Example 26 with Response

use of org.jsoup.Connection.Response in project crawler-jsoup-maven by bluetata.

the class AccResHead2ParseHTML method main.

public static void main(String[] args) throws Exception {
    // String _url = "http://61.161.221.4:8088/mainservlet?actionType=INDEX";
    String _url = "https://rate.taobao.com/feedRateList.htm?auctionNumId=552463737787&userNumId=1703495879&currentPageNum=1&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvqvvRvPpvUpCkvvvvvjiPPLcyljlbRsqwsjnEPmPpsjt8RLMvtjiER2q9ljnvRsyCvvBvpvvv9phv2n1w3xqBzYswM20S79wCvvNwzHi4z0CNiQhvChCvCCptvpvhphvvvvyCvh1vVxOvITlz8eQEfaAK53n5WDKt5BwsWD6rfuVHR4hSoAZnD704deDHEcqhaXTAVAIanixreTt%2BCclWQRp4e0Q4b64B9CkaU6UsxI2hKphv8hCvvvvvvhCvphvZJ9vvpu1vpC9CvvC216CvHjIvvhPjphvZK9vvpYJivpvUphvh3cUYvR7EvpvVp6WUCEIXvphvCyCCvvvvvvGCvvpvvvvv3QhvChCCvvvtvpvhphvvv86CvvDvppWpJpCv7OQ%3D&_ksTS=1519956772500_2207&callback=jsonp_tbcrate_reviews_list";
    String defaultCharset = "UTF-8";
    // according to response header to get parsed page char-set.
    Connection connection = Jsoup.connect(_url).userAgent(// User-Agent of Chrome 55
    "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36").referrer("http://blog.csdn.net/").header("Content-Type", "application/json; charset=GBK").header("Accept", "text/plain, */*; q=0.01").header("Accept-Encoding", "gzip,deflate,sdch").header("Accept-Language", "es-ES,es;q=0.8").header("Connection", "keep-alive").header("X-Requested-With", "XMLHttpRequest").maxBodySize(100).timeout(1000 * 10).method(Connection.Method.POST);
    Response response = connection.execute();
    String charset = response.charset();
    System.out.println("charset:" + charset);
    if (null != charset && !charset.isEmpty()) {
        defaultCharset = charset;
    }
    // Document doc = Jsoup.connect("http://blog.csdn.net/")
    // .cookies(response.cookies())
    // .timeout(10 * 10000)
    // .get();
    Document doc = Jsoup.parse(new URL(_url).openStream(), defaultCharset, _url);
    System.out.println(doc);
    System.out.println("----------:" + JsoupUtil.getStaticCharset(doc));
}
Also used : Response(org.jsoup.Connection.Response) Connection(org.jsoup.Connection) Document(org.jsoup.nodes.Document) URL(java.net.URL)

Example 27 with Response

use of org.jsoup.Connection.Response in project portfolio by buchen.

the class YahooFinanceQuoteFeed method requestData.

private String requestData(Security security, LocalDate startDate, Crumb requestCrumb) throws IOException {
    LocalDate stopDate = LocalDate.now();
    String wknUrl = // 
    MessageFormat.format(// 
    HISTORICAL_URL, // 
    security.getTickerSymbol(), // 
    String.valueOf(startDate.atStartOfDay(ZoneId.systemDefault()).toEpochSecond()), String.valueOf(stopDate.atStartOfDay(ZoneId.systemDefault()).toEpochSecond()), URLEncoder.encode(requestCrumb.getId(), StandardCharsets.UTF_8.name()));
    Response response = // 
    Jsoup.connect(wknUrl).userAgent(// 
    OnlineHelper.getUserAgent()).cookies(// 
    requestCrumb.getCookies()).timeout(30000).execute();
    if (response.statusCode() != HttpURLConnection.HTTP_OK)
        throw new IOException(MessageFormat.format(Messages.MsgErrorUnexpectedStatusCode, security.getTickerSymbol(), response.statusCode(), wknUrl));
    return response.body();
}
Also used : Response(org.jsoup.Connection.Response) IOException(java.io.IOException) LocalDate(java.time.LocalDate)

Example 28 with Response

use of org.jsoup.Connection.Response in project portfolio by buchen.

the class YahooFinanceQuoteFeed method updateLatestQuotes.

@Override
public final boolean updateLatestQuotes(Security security, List<Exception> errors) {
    String wknUrl = // $NON-NLS-1$
    MessageFormat.format(// $NON-NLS-1$
    "https://de.finance.yahoo.com/quote/{0}?ltr=1", security.getTickerSymbol());
    try {
        Response response = // 
        Jsoup.connect(wknUrl).userAgent(// 
        OnlineHelper.getUserAgent()).timeout(30000).execute();
        String body = response.body();
        // some quick and dirty extraction. This scraping code will anyway
        // not last long.
        // $NON-NLS-1$
        int startIndex = body.indexOf("QuoteSummaryStore");
        if (startIndex < 0)
            return false;
        LatestSecurityPrice price = new LatestSecurityPrice();
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> time = extract(body, startIndex, "\"regularMarketTime\":", ",");
        if (time.isPresent()) {
            long epoch = Long.parseLong(time.get());
            price.setDate(Instant.ofEpochSecond(epoch).atZone(ZoneId.systemDefault()).toLocalDate());
        }
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> value = extract(body, startIndex, "\"regularMarketPrice\":{\"raw\":", ",");
        if (value.isPresent())
            price.setValue(asPrice(value.get()));
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> previousClose = extract(body, startIndex, "\"regularMarketPreviousClose\":{\"raw\":", ",");
        if (previousClose.isPresent())
            price.setPreviousClose(asPrice(previousClose.get()));
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> high = extract(body, startIndex, "\"regularMarketDayHigh\":{\"raw\":", ",");
        if (high.isPresent())
            price.setHigh(asPrice(high.get()));
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> low = extract(body, startIndex, "\"regularMarketDayLow\":{\"raw\":", ",");
        if (low.isPresent())
            price.setLow(asPrice(low.get()));
        // $NON-NLS-1$ //$NON-NLS-2$
        Optional<String> volume = extract(body, startIndex, "\"regularMarketVolume\":{\"raw\":", ",");
        if (volume.isPresent())
            price.setVolume(asNumber(volume.get()));
        if (price.getDate() == null || price.getValue() <= 0) {
            errors.add(new IOException(body));
            return false;
        }
        security.setLatest(price);
        return true;
    } catch (IOException | ParseException e) {
        errors.add(e);
        return false;
    }
}
Also used : Response(org.jsoup.Connection.Response) LatestSecurityPrice(name.abuchen.portfolio.model.LatestSecurityPrice) IOException(java.io.IOException) ParseException(java.text.ParseException) DateTimeParseException(java.time.format.DateTimeParseException)

Aggregations

Response (org.jsoup.Connection.Response)28 Document (org.jsoup.nodes.Document)12 IOException (java.io.IOException)11 Element (org.jsoup.nodes.Element)10 HashMap (java.util.HashMap)8 URL (java.net.URL)4 Connection (org.jsoup.Connection)4 Elements (org.jsoup.select.Elements)4 ArrayList (java.util.ArrayList)3 BufferedWriter (java.io.BufferedWriter)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 FileWriter (java.io.FileWriter)1 HttpURLConnection (java.net.HttpURLConnection)1 MalformedURLException (java.net.MalformedURLException)1 ParseException (java.text.ParseException)1 LocalDate (java.time.LocalDate)1 DateTimeParseException (java.time.format.DateTimeParseException)1 Map (java.util.Map)1 Matcher (java.util.regex.Matcher)1