Search in sources :

Example 1 with PropertyDesc

use of org.codelibs.core.beans.PropertyDesc in project fess-crawler by codelibs.

the class AbstractCrawlerService method getUrl.

private String getUrl(final Object target) {
    final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(target.getClass());
    final PropertyDesc sessionIdProp = beanDesc.getPropertyDesc(URL);
    final Object sessionId = sessionIdProp.getValue(target);
    return sessionId == null ? null : sessionId.toString();
}
Also used : BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc)

Example 2 with PropertyDesc

use of org.codelibs.core.beans.PropertyDesc in project fess-crawler by codelibs.

the class AbstractCrawlerService method setId.

protected void setId(final Object target, final String id) {
    final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(target.getClass());
    final PropertyDesc idProp = beanDesc.getPropertyDesc(ID);
    idProp.setValue(target, id);
}
Also used : BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc)

Example 3 with PropertyDesc

use of org.codelibs.core.beans.PropertyDesc in project fess-crawler by codelibs.

the class AbstractCrawlerService method getSessionId.

private String getSessionId(final Object target) {
    final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(target.getClass());
    final PropertyDesc sessionIdProp = beanDesc.getPropertyDesc(SESSION_ID);
    final Object sessionId = sessionIdProp.getValue(target);
    return sessionId == null ? null : sessionId.toString();
}
Also used : BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc)

Example 4 with PropertyDesc

use of org.codelibs.core.beans.PropertyDesc in project fess-crawler by codelibs.

the class HcHttpClient method init.

@Override
public synchronized void init() {
    if (httpClient != null) {
        return;
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Initializing " + HcHttpClient.class.getName());
    }
    super.init();
    // robots.txt parser
    final Boolean robotsTxtEnabled = getInitParameter(ROBOTS_TXT_ENABLED_PROPERTY, Boolean.TRUE, Boolean.class);
    if (robotsTxtHelper != null) {
        robotsTxtHelper.setEnabled(robotsTxtEnabled.booleanValue());
    }
    // httpclient
    final org.apache.http.client.config.RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
    final HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
    final Integer connectionTimeoutParam = getInitParameter(CONNECTION_TIMEOUT_PROPERTY, connectionTimeout, Integer.class);
    if (connectionTimeoutParam != null) {
        requestConfigBuilder.setConnectTimeout(connectionTimeoutParam);
    }
    final Integer soTimeoutParam = getInitParameter(SO_TIMEOUT_PROPERTY, soTimeout, Integer.class);
    if (soTimeoutParam != null) {
        requestConfigBuilder.setSocketTimeout(soTimeoutParam);
    }
    // AuthSchemeFactory
    final RegistryBuilder<AuthSchemeProvider> authSchemeProviderBuilder = RegistryBuilder.create();
    @SuppressWarnings("unchecked") final Map<String, AuthSchemeProvider> factoryMap = getInitParameter(AUTH_SCHEME_PROVIDERS_PROPERTY, authSchemeProviderMap, Map.class);
    if (factoryMap != null) {
        for (final Map.Entry<String, AuthSchemeProvider> entry : factoryMap.entrySet()) {
            authSchemeProviderBuilder.register(entry.getKey(), entry.getValue());
        }
    }
    // user agent
    userAgent = getInitParameter(USER_AGENT_PROPERTY, userAgent, String.class);
    if (StringUtil.isNotBlank(userAgent)) {
        httpClientBuilder.setUserAgent(userAgent);
    }
    final HttpRoutePlanner planner = buildRoutePlanner();
    if (planner != null) {
        httpClientBuilder.setRoutePlanner(planner);
    }
    // Authentication
    final Authentication[] siteCredentialList = getInitParameter(BASIC_AUTHENTICATIONS_PROPERTY, new Authentication[0], Authentication[].class);
    final List<Pair<FormScheme, Credentials>> formSchemeList = new ArrayList<>();
    for (final Authentication authentication : siteCredentialList) {
        final AuthScheme authScheme = authentication.getAuthScheme();
        if (authScheme instanceof FormScheme) {
            formSchemeList.add(new Pair<>((FormScheme) authScheme, authentication.getCredentials()));
        } else {
            final AuthScope authScope = authentication.getAuthScope();
            credentialsProvider.setCredentials(authScope, authentication.getCredentials());
            if (authScope.getHost() != null && authScheme != null) {
                final HttpHost targetHost = new HttpHost(authScope.getHost(), authScope.getPort());
                authCache.put(targetHost, authScheme);
            }
        }
    }
    httpClientContext.setAuthCache(authCache);
    httpClientContext.setCredentialsProvider(credentialsProvider);
    // Request Header
    final RequestHeader[] requestHeaders = getInitParameter(REQUERT_HEADERS_PROPERTY, new RequestHeader[0], RequestHeader[].class);
    for (final RequestHeader requestHeader : requestHeaders) {
        if (requestHeader.isValid()) {
            requestHeaderList.add(new BasicHeader(requestHeader.getName(), requestHeader.getValue()));
        }
    }
    // do not redirect
    requestConfigBuilder.setRedirectsEnabled(getInitParameter(REDIRECTS_ENABLED, redirectsEnabled, Boolean.class));
    // cookie
    if (cookieSpec != null) {
        requestConfigBuilder.setCookieSpec(cookieSpec);
    }
    // cookie store
    httpClientBuilder.setDefaultCookieStore(cookieStore);
    if (cookieStore != null) {
        final Cookie[] cookies = getInitParameter(COOKIES_PROPERTY, new Cookie[0], Cookie[].class);
        for (final Cookie cookie : cookies) {
            cookieStore.addCookie(cookie);
        }
    }
    // cookie registry
    final Lookup<CookieSpecProvider> cookieSpecRegistry = buildCookieSpecRegistry();
    if (cookieSpecRegistry != null) {
        httpClientBuilder.setDefaultCookieSpecRegistry(cookieSpecRegistry);
    }
    // SSL
    final LayeredConnectionSocketFactory sslSocketFactory = buildSSLSocketFactory();
    if (sslSocketFactory != null) {
        httpClientBuilder.setSSLSocketFactory(sslSocketFactory);
    }
    connectionMonitorTask = TimeoutManager.getInstance().addTimeoutTarget(new HcConnectionMonitorTarget(clientConnectionManager, idleConnectionTimeout), connectionCheckInterval, true);
    final CloseableHttpClient closeableHttpClient = httpClientBuilder.setDnsResolver(dnsResolver).setConnectionManager(clientConnectionManager).setDefaultRequestConfig(requestConfigBuilder.build()).build();
    if (!httpClientPropertyMap.isEmpty()) {
        final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(closeableHttpClient.getClass());
        for (final Map.Entry<String, Object> entry : httpClientPropertyMap.entrySet()) {
            final String propertyName = entry.getKey();
            if (beanDesc.hasPropertyDesc(propertyName)) {
                final PropertyDesc propertyDesc = beanDesc.getPropertyDesc(propertyName);
                propertyDesc.setValue(closeableHttpClient, entry.getValue());
            } else {
                logger.warn("DefaultHttpClient does not have " + propertyName + ".");
            }
        }
    }
    formSchemeList.forEach(p -> {
        final FormScheme scheme = p.getFirst();
        final Credentials credentials = p.getSecond();
        scheme.authenticate(credentials, (request, consumer) -> {
            // request header
            for (final Header header : requestHeaderList) {
                request.addHeader(header);
            }
            HttpEntity httpEntity = null;
            try {
                final HttpResponse response = closeableHttpClient.execute(request, new BasicHttpContext(httpClientContext));
                httpEntity = response.getEntity();
                consumer.accept(response, httpEntity);
            } catch (final Exception e) {
                request.abort();
                logger.warn("Failed to authenticate on " + scheme, e);
            } finally {
                EntityUtils.consumeQuietly(httpEntity);
            }
        });
    });
    httpClient = closeableHttpClient;
}
Also used : HttpEntity(org.apache.http.HttpEntity) LayeredConnectionSocketFactory(org.apache.http.conn.socket.LayeredConnectionSocketFactory) BasicHttpContext(org.apache.http.protocol.BasicHttpContext) ArrayList(java.util.ArrayList) HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) AuthScheme(org.apache.http.auth.AuthScheme) HttpRoutePlanner(org.apache.http.conn.routing.HttpRoutePlanner) HttpHost(org.apache.http.HttpHost) BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc) Pair(org.codelibs.core.misc.Pair) Cookie(org.apache.http.cookie.Cookie) RequestConfig(org.apache.http.client.config.RequestConfig) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) HttpResponse(org.apache.http.HttpResponse) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ParseException(java.text.ParseException) NoRouteToHostException(java.net.NoRouteToHostException) SocketException(java.net.SocketException) ConnectException(java.net.ConnectException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) Header(org.apache.http.Header) BasicHeader(org.apache.http.message.BasicHeader) AuthScope(org.apache.http.auth.AuthScope) CookieSpecProvider(org.apache.http.cookie.CookieSpecProvider) DefaultCookieSpecProvider(org.apache.http.impl.cookie.DefaultCookieSpecProvider) RFC6265CookieSpecProvider(org.apache.http.impl.cookie.RFC6265CookieSpecProvider) AuthSchemeProvider(org.apache.http.auth.AuthSchemeProvider) Map(java.util.Map) HashMap(java.util.HashMap) FormScheme(org.codelibs.fess.crawler.client.http.form.FormScheme) BasicHeader(org.apache.http.message.BasicHeader) Credentials(org.apache.http.auth.Credentials)

Example 5 with PropertyDesc

use of org.codelibs.core.beans.PropertyDesc in project fess-crawler by codelibs.

the class ApiExtractor method init.

@PostConstruct
public void init() {
    if (logger.isDebugEnabled()) {
        logger.debug("Initializing " + ApiExtractor.class.getName());
    }
    // httpclient
    final org.apache.http.client.config.RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
    final HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
    final Integer connectionTimeoutParam = connectionTimeout;
    if (connectionTimeoutParam != null) {
        requestConfigBuilder.setConnectTimeout(connectionTimeoutParam);
    }
    final Integer soTimeoutParam = soTimeout;
    if (soTimeoutParam != null) {
        requestConfigBuilder.setSocketTimeout(soTimeoutParam);
    }
    // AuthSchemeFactory
    final RegistryBuilder<AuthSchemeProvider> authSchemeProviderBuilder = RegistryBuilder.create();
    // @SuppressWarnings("unchecked")
    final Map<String, AuthSchemeProvider> factoryMap = authSchemeProviderMap;
    if (factoryMap != null) {
        for (final Map.Entry<String, AuthSchemeProvider> entry : factoryMap.entrySet()) {
            authSchemeProviderBuilder.register(entry.getKey(), entry.getValue());
        }
    }
    // user agent
    if (StringUtil.isNotBlank(userAgent)) {
        httpClientBuilder.setUserAgent(userAgent);
    }
    // Authentication
    final Authentication[] siteCredentialList = new Authentication[0];
    for (final Authentication authentication : siteCredentialList) {
        final AuthScope authScope = authentication.getAuthScope();
        credentialsProvider.setCredentials(authScope, authentication.getCredentials());
        final AuthScheme authScheme = authentication.getAuthScheme();
        if (authScope.getHost() != null && authScheme != null) {
            final HttpHost targetHost = new HttpHost(authScope.getHost(), authScope.getPort());
            authCache.put(targetHost, authScheme);
        }
    }
    httpClientContext.setAuthCache(authCache);
    httpClientContext.setCredentialsProvider(credentialsProvider);
    // Request Header
    final RequestHeader[] requestHeaders = { new RequestHeader("enctype", "multipart/form-data") };
    for (final RequestHeader requestHeader : requestHeaders) {
        if (requestHeader.isValid()) {
            requestHeaderList.add(new BasicHeader(requestHeader.getName(), requestHeader.getValue()));
        }
    }
    final CloseableHttpClient closeableHttpClient = httpClientBuilder.setDefaultRequestConfig(requestConfigBuilder.build()).build();
    if (!httpClientPropertyMap.isEmpty()) {
        final BeanDesc beanDesc = BeanDescFactory.getBeanDesc(closeableHttpClient.getClass());
        for (final Map.Entry<String, Object> entry : httpClientPropertyMap.entrySet()) {
            final String propertyName = entry.getKey();
            if (beanDesc.hasPropertyDesc(propertyName)) {
                final PropertyDesc propertyDesc = beanDesc.getPropertyDesc(propertyName);
                propertyDesc.setValue(closeableHttpClient, entry.getValue());
            } else {
                logger.warn("DefaultHttpClient does not have " + propertyName + ".");
            }
        }
    }
    httpClient = closeableHttpClient;
}
Also used : HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) AuthScheme(org.apache.http.auth.AuthScheme) HttpHost(org.apache.http.HttpHost) BeanDesc(org.codelibs.core.beans.BeanDesc) PropertyDesc(org.codelibs.core.beans.PropertyDesc) RequestConfig(org.apache.http.client.config.RequestConfig) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) Authentication(org.codelibs.fess.crawler.client.http.Authentication) AuthScope(org.apache.http.auth.AuthScope) RequestHeader(org.codelibs.fess.crawler.client.http.RequestHeader) AuthSchemeProvider(org.apache.http.auth.AuthSchemeProvider) HashMap(java.util.HashMap) Map(java.util.Map) BasicHeader(org.apache.http.message.BasicHeader) PostConstruct(javax.annotation.PostConstruct)

Aggregations

BeanDesc (org.codelibs.core.beans.BeanDesc)5 PropertyDesc (org.codelibs.core.beans.PropertyDesc)5 HashMap (java.util.HashMap)2 Map (java.util.Map)2 HttpHost (org.apache.http.HttpHost)2 AuthScheme (org.apache.http.auth.AuthScheme)2 AuthSchemeProvider (org.apache.http.auth.AuthSchemeProvider)2 AuthScope (org.apache.http.auth.AuthScope)2 RequestConfig (org.apache.http.client.config.RequestConfig)2 CloseableHttpClient (org.apache.http.impl.client.CloseableHttpClient)2 HttpClientBuilder (org.apache.http.impl.client.HttpClientBuilder)2 BasicHeader (org.apache.http.message.BasicHeader)2 IOException (java.io.IOException)1 ConnectException (java.net.ConnectException)1 MalformedURLException (java.net.MalformedURLException)1 NoRouteToHostException (java.net.NoRouteToHostException)1 SocketException (java.net.SocketException)1 UnknownHostException (java.net.UnknownHostException)1 ParseException (java.text.ParseException)1 ArrayList (java.util.ArrayList)1