Search in sources :

Example 6 with CookieSpecProvider

use of org.apache.http.cookie.CookieSpecProvider in project fess-crawler by codelibs.

the class HcHttpClient method init.

@Override
public synchronized void init() {
    if (httpClient != null) {
        return;
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Initializing " + HcHttpClient.class.getName());
    }
    super.init();
    // robots.txt parser
    final Boolean robotsTxtEnabled = getInitParameter(ROBOTS_TXT_ENABLED_PROPERTY, Boolean.TRUE, Boolean.class);
    if (robotsTxtHelper != null) {
        robotsTxtHelper.setEnabled(robotsTxtEnabled.booleanValue());
    }
    // httpclient
    final org.apache.http.client.config.RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
    final HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
    final Integer connectionTimeoutParam = getInitParameter(CONNECTION_TIMEOUT_PROPERTY, connectionTimeout, Integer.class);
    if (connectionTimeoutParam != null) {
        requestConfigBuilder.setConnectTimeout(connectionTimeoutParam);
    }
    final Integer soTimeoutParam = getInitParameter(SO_TIMEOUT_PROPERTY, soTimeout, Integer.class);
    if (soTimeoutParam != null) {
        requestConfigBuilder.setSocketTimeout(soTimeoutParam);
    }
    // AuthSchemeFactory
    final RegistryBuilder<AuthSchemeProvider> authSchemeProviderBuilder = RegistryBuilder.create();
    @SuppressWarnings("unchecked") final Map<String, AuthSchemeProvider> factoryMap = getInitParameter(AUTH_SCHEME_PROVIDERS_PROPERTY, authSchemeProviderMap, Map.class);
    if (factoryMap != null) {
        for (final Map.Entry<String, AuthSchemeProvider> entry : factoryMap.entrySet()) {
            authSchemeProviderBuilder.register(entry.getKey(), entry.getValue());
        }
    }
    // user agent
    userAgent = getInitParameter(USER_AGENT_PROPERTY, userAgent, String.class);
    if (StringUtil.isNotBlank(userAgent)) {
        httpClientBuilder.setUserAgent(userAgent);
    }
    final HttpRoutePlanner planner = buildRoutePlanner();
    if (planner != null) {
        httpClientBuilder.setRoutePlanner(planner);
    }
    // Authentication
    final Authentication[] siteCredentialList = getInitParameter(BASIC_AUTHENTICATIONS_PROPERTY, new Authentication[0], Authentication[].class);
    final List<Pair<FormScheme, Credentials>> formSchemeList = new ArrayList<>();
    for (final Authentication authentication : siteCredentialList) {
        final AuthScheme authScheme = authentication.getAuthScheme();
        if (authScheme instanceof FormScheme) {
            formSchemeList.add(new Pair<>((FormScheme) authScheme, authentication.getCredentials()));
        } else {
            final AuthScope authScope = authentication.getAuthScope();
            credentialsProvider.setCredentials(authScope, authentication.getCredentials());
            if (authScope.getHost() != null && authScheme != null) {
                final HttpHost targetHost = new HttpHost(authScope.getHost(), authScope.getPort());
                authCache.put(targetHost, authScheme);
            }
        }
    }
    httpClientContext.setAuthCache(authCache);
    httpClientContext.setCredentialsProvider(credentialsProvider);
    // Request Header
    final RequestHeader[] requestHeaders = getInitParameter(REQUERT_HEADERS_PROPERTY, new RequestHeader[0], RequestHeader[].class);
    for (final RequestHeader requestHeader : requestHeaders) {
        if (requestHeader.isValid()) {
            requestHeaderList.add(new BasicHeader(requestHeader.getName(), requestHeader.getValue()));
        }
    }
    // do not redirect
    requestConfigBuilder.setRedirectsEnabled(getInitParameter(REDIRECTS_ENABLED, redirectsEnabled, Boolean.class));
    // cookie
    if (cookieSpec != null) {
        requestConfigBuilder.setCookieSpec(cookieSpec);
    }
    // cookie store
    httpClientBuilder.setDefaultCookieStore(cookieStore);
    if (cookieStore != null) {
        final Cookie[] cookies = getInitParameter(COOKIES_PROPERTY, new Cookie[0], Cookie[].class);
        for (final Cookie cookie : cookies) {
            cookieStore.addCookie(cookie);
        }
    }
    // cookie registry
    final Lookup<CookieSpecProvider> cookieSpecRegistry = buildCookieSpecRegistry();
    if (cookieSpecRegistry != null) {
        httpClientBuilder.setDefaultCookieSpecRegistry(cookieSpecRegistry);
    }
    // SSL
    final LayeredConnectionSocketFactory sslSocketFactory = buildSSLSocketFactory();
    if (sslSocketFactory != null) {
        httpClientBuilder.setSSLSocketFactory(sslSocketFactory);
    }
    connectionMonitorTask = TimeoutManager.getInstance().addTimeoutTarget(new HcConnectionMonitorTarget(clientConnectionManager, idleConnectionTimeout), connectionCheckInterval, true);
    final CloseableHttpClient closeableHttpClient = httpClientBuilder.setDnsResolver(dnsResolver).setConnectionManager(clientConnectionManager).setDefaultRequestConfig(requestConfigBuilder.build()).build();
    if (!httpClientPropertyMap.isEmpty()) {
        final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(closeableHttpClient.getClass());
        for (final Map.Entry<String, Object> entry : httpClientPropertyMap.entrySet()) {
            final String propertyName = entry.getKey();
            if (beanDesc.hasPropertyDesc(propertyName)) {
                final PropertyDesc propertyDesc = beanDesc.getPropertyDesc(propertyName);
                propertyDesc.setValue(closeableHttpClient, entry.getValue());
            } else {
                logger.warn("DefaultHttpClient does not have " + propertyName + ".");
            }
        }
    }
    formSchemeList.forEach(p -> {
        final FormScheme scheme = p.getFirst();
        final Credentials credentials = p.getSecond();
        scheme.authenticate(credentials, (request, consumer) -> {
            // request header
            for (final Header header : requestHeaderList) {
                request.addHeader(header);
            }
            HttpEntity httpEntity = null;
            try {
                final HttpResponse response = closeableHttpClient.execute(request, new BasicHttpContext(httpClientContext));
                httpEntity = response.getEntity();
                consumer.accept(response, httpEntity);
            } catch (final Exception e) {
                request.abort();
                logger.warn("Failed to authenticate on " + scheme, e);
            } finally {
                EntityUtils.consumeQuietly(httpEntity);
            }
        });
    });
    httpClient = closeableHttpClient;
}
Also used : HttpEntity(org.apache.http.HttpEntity) LayeredConnectionSocketFactory(org.apache.http.conn.socket.LayeredConnectionSocketFactory) BasicHttpContext(org.apache.http.protocol.BasicHttpContext) ArrayList(java.util.ArrayList) HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) AuthScheme(org.apache.http.auth.AuthScheme) HttpRoutePlanner(org.apache.http.conn.routing.HttpRoutePlanner) HttpHost(org.apache.http.HttpHost) BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc) Pair(org.codelibs.core.misc.Pair) Cookie(org.apache.http.cookie.Cookie) RequestConfig(org.apache.http.client.config.RequestConfig) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) HttpResponse(org.apache.http.HttpResponse) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ParseException(java.text.ParseException) NoRouteToHostException(java.net.NoRouteToHostException) SocketException(java.net.SocketException) ConnectException(java.net.ConnectException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) Header(org.apache.http.Header) BasicHeader(org.apache.http.message.BasicHeader) AuthScope(org.apache.http.auth.AuthScope) CookieSpecProvider(org.apache.http.cookie.CookieSpecProvider) DefaultCookieSpecProvider(org.apache.http.impl.cookie.DefaultCookieSpecProvider) RFC6265CookieSpecProvider(org.apache.http.impl.cookie.RFC6265CookieSpecProvider) AuthSchemeProvider(org.apache.http.auth.AuthSchemeProvider) Map(java.util.Map) HashMap(java.util.HashMap) FormScheme(org.codelibs.fess.crawler.client.http.form.FormScheme) BasicHeader(org.apache.http.message.BasicHeader) Credentials(org.apache.http.auth.Credentials)

Aggregations

CookieSpecProvider (org.apache.http.cookie.CookieSpecProvider)6 AuthSchemeProvider (org.apache.http.auth.AuthSchemeProvider)3 HttpClientBuilder (org.apache.http.impl.client.HttpClientBuilder)3 HttpHost (org.apache.http.HttpHost)2 AuthScope (org.apache.http.auth.AuthScope)2 Credentials (org.apache.http.auth.Credentials)2 CredentialsProvider (org.apache.http.client.CredentialsProvider)2 RequestConfig (org.apache.http.client.config.RequestConfig)2 PublicSuffixMatcher (org.apache.http.conn.util.PublicSuffixMatcher)2 BasicCredentialsProvider (org.apache.http.impl.client.BasicCredentialsProvider)2 CloseableHttpClient (org.apache.http.impl.client.CloseableHttpClient)2 DefaultCookieSpecProvider (org.apache.http.impl.cookie.DefaultCookieSpecProvider)2 IgnoreSpecProvider (org.apache.http.impl.cookie.IgnoreSpecProvider)2 NetscapeDraftSpecProvider (org.apache.http.impl.cookie.NetscapeDraftSpecProvider)2 RFC6265CookieSpecProvider (org.apache.http.impl.cookie.RFC6265CookieSpecProvider)2 IOException (java.io.IOException)1 ConnectException (java.net.ConnectException)1 MalformedURLException (java.net.MalformedURLException)1 NoRouteToHostException (java.net.NoRouteToHostException)1 SocketException (java.net.SocketException)1