Search in sources :

Example 1 with CrawlingConfigWrapper

use of org.codelibs.fess.es.config.exentity.CrawlingConfigWrapper in project fess by codelibs.

the class GitBucketDataStoreImpl method storeData.

@Override
protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap, final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
    final String rootURL = getRootURL(paramMap);
    final String authToken = getAuthToken(paramMap);
    final long readInterval = getReadInterval(paramMap);
    // Non-emptiness Check for URL and Token
    if (rootURL.isEmpty() || authToken.isEmpty()) {
        logger.warn("parameter \"" + TOKEN_PARAM + "\" and \"" + GITBUCKET_URL_PARAM + "\" are required");
        return;
    }
    // Get List of Repositories
    final List<Map<String, Object>> repositoryList = getRepositoryList(rootURL, authToken);
    if (repositoryList.isEmpty()) {
        logger.warn("Token is invalid or no Repository");
        return;
    }
    // Get Labels
    final Map<String, String> pluginInfo = getFessPluginInfo(rootURL, authToken);
    final String sourceLabel = pluginInfo.get("source_label");
    final String issueLabel = pluginInfo.get("issue_label");
    final String wikiLabel = pluginInfo.get("wiki_label");
    final CrawlingConfig crawlingConfig = new CrawlingConfigWrapper(dataConfig) {

        @Override
        public Map<String, Object> initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) {
            final Map<String, Object> paramMap = super.initializeClientFactory(crawlerClientFactory);
            final List<RequestHeader> headerList = new ArrayList<>();
            final RequestHeader[] headers = (RequestHeader[]) paramMap.get(HcHttpClient.REQUERT_HEADERS_PROPERTY);
            if (headers != null) {
                for (final RequestHeader header : headers) {
                    headerList.add(header);
                }
            }
            headerList.add(new RequestHeader("Authorization", "token " + authToken));
            headerList.add(new RequestHeader("Accept", "application/vnd.github.v3.raw"));
            paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, headerList.toArray(new RequestHeader[headerList.size()]));
            return paramMap;
        }
    };
    // Crawl each repository
    for (final Map<String, Object> repository : repositoryList) {
        try {
            final String owner = (String) repository.get("owner");
            final String name = (String) repository.get("name");
            final String refStr = getGitRef(rootURL, authToken, owner, name, "master");
            final int issueCount = (int) repository.get("issue_count");
            final int pullCount = (int) repository.get("pull_count");
            final List<String> roleList = createRoleList(owner, repository);
            logger.info("Crawl " + owner + "/" + name);
            // crawl and store file contents recursively
            crawlFileContents(rootURL, authToken, owner, name, refStr, StringUtil.EMPTY, 0, readInterval, path -> {
                storeFileContent(rootURL, authToken, sourceLabel, owner, name, refStr, roleList, path, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap);
                if (readInterval > 0) {
                    sleep(readInterval);
                }
            });
            logger.info("Crawl issues in " + owner + "/" + name);
            // store issues
            for (int issueId = 1; issueId <= issueCount + pullCount; issueId++) {
                storeIssueById(rootURL, authToken, issueLabel, owner, name, new Integer(issueId), roleList, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap);
                if (readInterval > 0) {
                    sleep(readInterval);
                }
            }
            logger.info("Crawl Wiki in " + owner + "/" + name);
            // crawl Wiki
            storeWikiContents(rootURL, authToken, wikiLabel, owner, name, roleList, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap, readInterval);
        } catch (final Exception e) {
            logger.warn("Failed to access to " + repository, e);
        }
    }
}
Also used : CrawlingConfig(org.codelibs.fess.es.config.exentity.CrawlingConfig) CrawlingConfigWrapper(org.codelibs.fess.es.config.exentity.CrawlingConfigWrapper) CrawlerClientFactory(org.codelibs.fess.crawler.client.CrawlerClientFactory) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) RequestHeader(org.codelibs.fess.crawler.client.http.RequestHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 CrawlerClientFactory (org.codelibs.fess.crawler.client.CrawlerClientFactory)1 RequestHeader (org.codelibs.fess.crawler.client.http.RequestHeader)1 CrawlingConfig (org.codelibs.fess.es.config.exentity.CrawlingConfig)1 CrawlingConfigWrapper (org.codelibs.fess.es.config.exentity.CrawlingConfigWrapper)1