use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.
the class DataIndexHelper method doCrawl.
protected void doCrawl(final String sessionId, final List<DataConfig> configList) {
final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();
final long startTime = System.currentTimeMillis();
final IndexUpdateCallback indexUpdateCallback = ComponentUtil.getComponent(IndexUpdateCallback.class);
final List<String> sessionIdList = new ArrayList<>();
dataCrawlingThreadList.clear();
final List<String> dataCrawlingThreadStatusList = new ArrayList<>();
for (final DataConfig dataConfig : configList) {
final Map<String, String> initParamMap = new HashMap<>();
final String sid = ComponentUtil.getCrawlingConfigHelper().store(sessionId, dataConfig);
sessionIdList.add(sid);
initParamMap.put(Constants.SESSION_ID, sessionId);
initParamMap.put(Constants.CRAWLING_INFO_ID, sid);
final DataCrawlingThread dataCrawlingThread = new DataCrawlingThread(dataConfig, indexUpdateCallback, initParamMap);
dataCrawlingThread.setPriority(crawlerPriority);
dataCrawlingThread.setName(sid);
dataCrawlingThread.setDaemon(true);
dataCrawlingThreadList.add(dataCrawlingThread);
dataCrawlingThreadStatusList.add(Constants.READY);
}
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
int startedCrawlerNum = 0;
int activeCrawlerNum = 0;
while (startedCrawlerNum < dataCrawlingThreadList.size()) {
// Force to stop crawl
if (systemHelper.isForceStop()) {
for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) {
crawlerThread.stopCrawling();
}
break;
}
if (activeCrawlerNum < multiprocessCrawlingCount) {
// start crawling
dataCrawlingThreadList.get(startedCrawlerNum).start();
dataCrawlingThreadStatusList.set(startedCrawlerNum, Constants.RUNNING);
startedCrawlerNum++;
activeCrawlerNum++;
ThreadUtil.sleep(crawlingExecutionInterval);
continue;
}
// check status
for (int i = 0; i < startedCrawlerNum; i++) {
if (!dataCrawlingThreadList.get(i).isRunning() && Constants.RUNNING.equals(dataCrawlingThreadStatusList.get(i))) {
dataCrawlingThreadList.get(i).awaitTermination();
dataCrawlingThreadStatusList.set(i, Constants.DONE);
activeCrawlerNum--;
}
}
ThreadUtil.sleep(crawlingExecutionInterval);
}
boolean finishedAll = false;
while (!finishedAll) {
finishedAll = true;
for (int i = 0; i < dataCrawlingThreadList.size(); i++) {
dataCrawlingThreadList.get(i).awaitTermination(crawlingExecutionInterval);
if (!dataCrawlingThreadList.get(i).isRunning() && Constants.RUNNING.equals(dataCrawlingThreadStatusList.get(i))) {
dataCrawlingThreadStatusList.set(i, Constants.DONE);
}
if (!Constants.DONE.equals(dataCrawlingThreadStatusList.get(i))) {
finishedAll = false;
}
}
}
dataCrawlingThreadList.clear();
dataCrawlingThreadStatusList.clear();
// put cralwing info
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
final long execTime = System.currentTimeMillis() - startTime;
crawlingInfoHelper.putToInfoMap(Constants.DATA_CRAWLING_EXEC_TIME, Long.toString(execTime));
if (logger.isInfoEnabled()) {
logger.info("[EXEC TIME] crawling time: {}ms", execTime);
}
crawlingInfoHelper.putToInfoMap(Constants.DATA_INDEX_EXEC_TIME, Long.toString(indexUpdateCallback.getExecuteTime()));
crawlingInfoHelper.putToInfoMap(Constants.DATA_INDEX_SIZE, Long.toString(indexUpdateCallback.getDocumentSize()));
for (final String sid : sessionIdList) {
// remove config
ComponentUtil.getCrawlingConfigHelper().remove(sid);
}
}
use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.
the class AbstractDataStore method store.
@Override
public void store(final DataConfig config, final IndexUpdateCallback callback, final Map<String, String> initParamMap) {
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final Date documentExpires = crawlingInfoHelper.getDocumentExpires(config);
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final Map<String, String> paramEnvMap = systemHelper.getFilteredEnvMap(fessConfig.getCrawlerDataEnvParamKeyPattern());
final Map<String, String> configParamMap = config.getHandlerParameterMap().entrySet().stream().map(e -> {
final String key = e.getKey();
String value = e.getValue();
for (final Map.Entry<String, String> entry : paramEnvMap.entrySet()) {
value = value.replace("${" + entry.getKey() + "}", entry.getValue());
}
return new Pair<>(key, value);
}).collect(Collectors.toMap(Pair<String, String>::getFirst, Pair<String, String>::getSecond));
final Map<String, String> configScriptMap = config.getHandlerScriptMap();
initParamMap.putAll(configParamMap);
final Map<String, String> paramMap = initParamMap;
// default values
final Map<String, Object> defaultDataMap = new HashMap<>();
// cid
final String configId = config.getConfigId();
if (configId != null) {
defaultDataMap.put(fessConfig.getIndexFieldConfigId(), configId);
}
// expires
if (documentExpires != null) {
defaultDataMap.put(fessConfig.getIndexFieldExpires(), documentExpires);
}
// segment
defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.get(Constants.SESSION_ID));
// created
defaultDataMap.put(fessConfig.getIndexFieldCreated(), systemHelper.getCurrentTime());
// boost
defaultDataMap.put(fessConfig.getIndexFieldBoost(), config.getBoost().toString());
// label: labelType
// role: roleType
final List<String> roleTypeList = new ArrayList<>();
stream(config.getPermissions()).of(stream -> stream.forEach(p -> roleTypeList.add(p)));
defaultDataMap.put(fessConfig.getIndexFieldRole(), roleTypeList);
// mimetype
defaultDataMap.put(fessConfig.getIndexFieldMimetype(), mimeType);
// title
// content
// cache
// digest
// host
// site
// url
// anchor
// content_length
// last_modified
// id
// virtual_host
defaultDataMap.put(fessConfig.getIndexFieldVirtualHost(), stream(config.getVirtualHosts()).get(stream -> stream.filter(StringUtil::isNotBlank).collect(Collectors.toList())));
storeData(config, callback, new ParamMap<>(paramMap), configScriptMap, defaultDataMap);
}
use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.
the class AbstractDataStoreTest method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
dataStore = new AbstractDataStore() {
@Override
protected String getName() {
return "Test";
}
@Override
protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap, Map<String, String> scriptMap, Map<String, Object> defaultDataMap) {
// TODO nothing
}
};
ScriptEngineFactory scriptEngineFactory = new ScriptEngineFactory();
ComponentUtil.register(scriptEngineFactory, "scriptEngineFactory");
new AbstractScriptEngine() {
@Override
public Object evaluate(String template, Map<String, Object> paramMap) {
final Map<String, Object> bindingMap = new HashMap<>(paramMap);
bindingMap.put("container", SingletonLaContainerFactory.getContainer());
final GroovyShell groovyShell = new GroovyShell(new Binding(bindingMap));
try {
return groovyShell.evaluate(template);
} catch (final JobProcessingException e) {
throw e;
} catch (final Exception e) {
return null;
} finally {
final GroovyClassLoader loader = groovyShell.getClassLoader();
loader.clearCache();
}
}
@Override
protected String getName() {
return Constants.DEFAULT_SCRIPT;
}
}.register();
}
Aggregations