use of org.codelibs.fess.crawler.client.http.RequestHeader in project fess by codelibs.
the class GitBucketDataStoreImpl method storeData.
@Override
protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap, final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
final String rootURL = getRootURL(paramMap);
final String authToken = getAuthToken(paramMap);
final long readInterval = getReadInterval(paramMap);
// Non-emptiness Check for URL and Token
if (rootURL.isEmpty() || authToken.isEmpty()) {
logger.warn("parameter \"" + TOKEN_PARAM + "\" and \"" + GITBUCKET_URL_PARAM + "\" are required");
return;
}
// Get List of Repositories
final List<Map<String, Object>> repositoryList = getRepositoryList(rootURL, authToken);
if (repositoryList.isEmpty()) {
logger.warn("Token is invalid or no Repository");
return;
}
// Get Labels
final Map<String, String> pluginInfo = getFessPluginInfo(rootURL, authToken);
final String sourceLabel = pluginInfo.get("source_label");
final String issueLabel = pluginInfo.get("issue_label");
final String wikiLabel = pluginInfo.get("wiki_label");
final CrawlingConfig crawlingConfig = new CrawlingConfigWrapper(dataConfig) {
@Override
public Map<String, Object> initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) {
final Map<String, Object> paramMap = super.initializeClientFactory(crawlerClientFactory);
final List<RequestHeader> headerList = new ArrayList<>();
final RequestHeader[] headers = (RequestHeader[]) paramMap.get(HcHttpClient.REQUERT_HEADERS_PROPERTY);
if (headers != null) {
for (final RequestHeader header : headers) {
headerList.add(header);
}
}
headerList.add(new RequestHeader("Authorization", "token " + authToken));
headerList.add(new RequestHeader("Accept", "application/vnd.github.v3.raw"));
paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, headerList.toArray(new RequestHeader[headerList.size()]));
return paramMap;
}
};
// Crawl each repository
for (final Map<String, Object> repository : repositoryList) {
try {
final String owner = (String) repository.get("owner");
final String name = (String) repository.get("name");
final String refStr = getGitRef(rootURL, authToken, owner, name, "master");
final int issueCount = (int) repository.get("issue_count");
final int pullCount = (int) repository.get("pull_count");
final List<String> roleList = createRoleList(owner, repository);
logger.info("Crawl " + owner + "/" + name);
// crawl and store file contents recursively
crawlFileContents(rootURL, authToken, owner, name, refStr, StringUtil.EMPTY, 0, readInterval, path -> {
storeFileContent(rootURL, authToken, sourceLabel, owner, name, refStr, roleList, path, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap);
if (readInterval > 0) {
sleep(readInterval);
}
});
logger.info("Crawl issues in " + owner + "/" + name);
// store issues
for (int issueId = 1; issueId <= issueCount + pullCount; issueId++) {
storeIssueById(rootURL, authToken, issueLabel, owner, name, new Integer(issueId), roleList, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap);
if (readInterval > 0) {
sleep(readInterval);
}
}
logger.info("Crawl Wiki in " + owner + "/" + name);
// crawl Wiki
storeWikiContents(rootURL, authToken, wikiLabel, owner, name, roleList, crawlingConfig, callback, paramMap, scriptMap, defaultDataMap, readInterval);
} catch (final Exception e) {
logger.warn("Failed to access to " + repository, e);
}
}
}
Aggregations