Search in sources :

Example 1 with IndexUpdateCallback

use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.

the class DataIndexHelper method doCrawl.

protected void doCrawl(final String sessionId, final List<DataConfig> configList) {
    final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();
    final long startTime = System.currentTimeMillis();
    final IndexUpdateCallback indexUpdateCallback = ComponentUtil.getComponent(IndexUpdateCallback.class);
    final List<String> sessionIdList = new ArrayList<>();
    dataCrawlingThreadList.clear();
    final List<String> dataCrawlingThreadStatusList = new ArrayList<>();
    for (final DataConfig dataConfig : configList) {
        final Map<String, String> initParamMap = new HashMap<>();
        final String sid = ComponentUtil.getCrawlingConfigHelper().store(sessionId, dataConfig);
        sessionIdList.add(sid);
        initParamMap.put(Constants.SESSION_ID, sessionId);
        initParamMap.put(Constants.CRAWLING_INFO_ID, sid);
        final DataCrawlingThread dataCrawlingThread = new DataCrawlingThread(dataConfig, indexUpdateCallback, initParamMap);
        dataCrawlingThread.setPriority(crawlerPriority);
        dataCrawlingThread.setName(sid);
        dataCrawlingThread.setDaemon(true);
        dataCrawlingThreadList.add(dataCrawlingThread);
        dataCrawlingThreadStatusList.add(Constants.READY);
    }
    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
    int startedCrawlerNum = 0;
    int activeCrawlerNum = 0;
    while (startedCrawlerNum < dataCrawlingThreadList.size()) {
        // Force to stop crawl
        if (systemHelper.isForceStop()) {
            for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) {
                crawlerThread.stopCrawling();
            }
            break;
        }
        if (activeCrawlerNum < multiprocessCrawlingCount) {
            // start crawling
            dataCrawlingThreadList.get(startedCrawlerNum).start();
            dataCrawlingThreadStatusList.set(startedCrawlerNum, Constants.RUNNING);
            startedCrawlerNum++;
            activeCrawlerNum++;
            ThreadUtil.sleep(crawlingExecutionInterval);
            continue;
        }
        // check status
        for (int i = 0; i < startedCrawlerNum; i++) {
            if (!dataCrawlingThreadList.get(i).isRunning() && Constants.RUNNING.equals(dataCrawlingThreadStatusList.get(i))) {
                dataCrawlingThreadList.get(i).awaitTermination();
                dataCrawlingThreadStatusList.set(i, Constants.DONE);
                activeCrawlerNum--;
            }
        }
        ThreadUtil.sleep(crawlingExecutionInterval);
    }
    boolean finishedAll = false;
    while (!finishedAll) {
        finishedAll = true;
        for (int i = 0; i < dataCrawlingThreadList.size(); i++) {
            dataCrawlingThreadList.get(i).awaitTermination(crawlingExecutionInterval);
            if (!dataCrawlingThreadList.get(i).isRunning() && Constants.RUNNING.equals(dataCrawlingThreadStatusList.get(i))) {
                dataCrawlingThreadStatusList.set(i, Constants.DONE);
            }
            if (!Constants.DONE.equals(dataCrawlingThreadStatusList.get(i))) {
                finishedAll = false;
            }
        }
    }
    dataCrawlingThreadList.clear();
    dataCrawlingThreadStatusList.clear();
    // put cralwing info
    final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
    final long execTime = System.currentTimeMillis() - startTime;
    crawlingInfoHelper.putToInfoMap(Constants.DATA_CRAWLING_EXEC_TIME, Long.toString(execTime));
    if (logger.isInfoEnabled()) {
        logger.info("[EXEC TIME] crawling time: {}ms", execTime);
    }
    crawlingInfoHelper.putToInfoMap(Constants.DATA_INDEX_EXEC_TIME, Long.toString(indexUpdateCallback.getExecuteTime()));
    crawlingInfoHelper.putToInfoMap(Constants.DATA_INDEX_SIZE, Long.toString(indexUpdateCallback.getDocumentSize()));
    for (final String sid : sessionIdList) {
        // remove config
        ComponentUtil.getCrawlingConfigHelper().remove(sid);
    }
}
Also used : HashMap(java.util.HashMap) IndexUpdateCallback(org.codelibs.fess.ds.callback.IndexUpdateCallback) ArrayList(java.util.ArrayList) DataConfig(org.codelibs.fess.es.config.exentity.DataConfig)

Example 2 with IndexUpdateCallback

use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.

the class AbstractDataStore method store.

@Override
public void store(final DataConfig config, final IndexUpdateCallback callback, final Map<String, String> initParamMap) {
    final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
    final Date documentExpires = crawlingInfoHelper.getDocumentExpires(config);
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final Map<String, String> paramEnvMap = systemHelper.getFilteredEnvMap(fessConfig.getCrawlerDataEnvParamKeyPattern());
    final Map<String, String> configParamMap = config.getHandlerParameterMap().entrySet().stream().map(e -> {
        final String key = e.getKey();
        String value = e.getValue();
        for (final Map.Entry<String, String> entry : paramEnvMap.entrySet()) {
            value = value.replace("${" + entry.getKey() + "}", entry.getValue());
        }
        return new Pair<>(key, value);
    }).collect(Collectors.toMap(Pair<String, String>::getFirst, Pair<String, String>::getSecond));
    final Map<String, String> configScriptMap = config.getHandlerScriptMap();
    initParamMap.putAll(configParamMap);
    final Map<String, String> paramMap = initParamMap;
    // default values
    final Map<String, Object> defaultDataMap = new HashMap<>();
    // cid
    final String configId = config.getConfigId();
    if (configId != null) {
        defaultDataMap.put(fessConfig.getIndexFieldConfigId(), configId);
    }
    // expires
    if (documentExpires != null) {
        defaultDataMap.put(fessConfig.getIndexFieldExpires(), documentExpires);
    }
    // segment
    defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.get(Constants.SESSION_ID));
    // created
    defaultDataMap.put(fessConfig.getIndexFieldCreated(), systemHelper.getCurrentTime());
    // boost
    defaultDataMap.put(fessConfig.getIndexFieldBoost(), config.getBoost().toString());
    // label: labelType
    // role: roleType
    final List<String> roleTypeList = new ArrayList<>();
    stream(config.getPermissions()).of(stream -> stream.forEach(p -> roleTypeList.add(p)));
    defaultDataMap.put(fessConfig.getIndexFieldRole(), roleTypeList);
    // mimetype
    defaultDataMap.put(fessConfig.getIndexFieldMimetype(), mimeType);
    // title
    // content
    // cache
    // digest
    // host
    // site
    // url
    // anchor
    // content_length
    // last_modified
    // id
    // virtual_host
    defaultDataMap.put(fessConfig.getIndexFieldVirtualHost(), stream(config.getVirtualHosts()).get(stream -> stream.filter(StringUtil::isNotBlank).collect(Collectors.toList())));
    storeData(config, callback, new ParamMap<>(paramMap), configScriptMap, defaultDataMap);
}
Also used : DataConfig(org.codelibs.fess.es.config.exentity.DataConfig) ThreadUtil(org.codelibs.core.lang.ThreadUtil) Constants(org.codelibs.fess.Constants) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) Date(java.util.Date) IndexUpdateCallback(org.codelibs.fess.ds.callback.IndexUpdateCallback) StringUtil(org.codelibs.core.lang.StringUtil) Pair(org.codelibs.core.misc.Pair) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) List(java.util.List) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Logger(org.apache.logging.log4j.Logger) ComponentUtil(org.codelibs.fess.util.ComponentUtil) CrawlingInfoHelper(org.codelibs.fess.helper.CrawlingInfoHelper) SystemHelper(org.codelibs.fess.helper.SystemHelper) Map(java.util.Map) LogManager(org.apache.logging.log4j.LogManager) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Date(java.util.Date) SystemHelper(org.codelibs.fess.helper.SystemHelper) CrawlingInfoHelper(org.codelibs.fess.helper.CrawlingInfoHelper)

Example 3 with IndexUpdateCallback

use of org.codelibs.fess.ds.callback.IndexUpdateCallback in project fess by codelibs.

the class AbstractDataStoreTest method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    dataStore = new AbstractDataStore() {

        @Override
        protected String getName() {
            return "Test";
        }

        @Override
        protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap, Map<String, String> scriptMap, Map<String, Object> defaultDataMap) {
        // TODO nothing
        }
    };
    ScriptEngineFactory scriptEngineFactory = new ScriptEngineFactory();
    ComponentUtil.register(scriptEngineFactory, "scriptEngineFactory");
    new AbstractScriptEngine() {

        @Override
        public Object evaluate(String template, Map<String, Object> paramMap) {
            final Map<String, Object> bindingMap = new HashMap<>(paramMap);
            bindingMap.put("container", SingletonLaContainerFactory.getContainer());
            final GroovyShell groovyShell = new GroovyShell(new Binding(bindingMap));
            try {
                return groovyShell.evaluate(template);
            } catch (final JobProcessingException e) {
                throw e;
            } catch (final Exception e) {
                return null;
            } finally {
                final GroovyClassLoader loader = groovyShell.getClassLoader();
                loader.clearCache();
            }
        }

        @Override
        protected String getName() {
            return Constants.DEFAULT_SCRIPT;
        }
    }.register();
}
Also used : Binding(groovy.lang.Binding) IndexUpdateCallback(org.codelibs.fess.ds.callback.IndexUpdateCallback) JobProcessingException(org.codelibs.fess.exception.JobProcessingException) GroovyShell(groovy.lang.GroovyShell) JobProcessingException(org.codelibs.fess.exception.JobProcessingException) GroovyClassLoader(groovy.lang.GroovyClassLoader) DataConfig(org.codelibs.fess.es.config.exentity.DataConfig) ScriptEngineFactory(org.codelibs.fess.script.ScriptEngineFactory) HashMap(java.util.HashMap) Map(java.util.Map) AbstractScriptEngine(org.codelibs.fess.script.AbstractScriptEngine)

Aggregations

HashMap (java.util.HashMap)3 IndexUpdateCallback (org.codelibs.fess.ds.callback.IndexUpdateCallback)3 DataConfig (org.codelibs.fess.es.config.exentity.DataConfig)3 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Binding (groovy.lang.Binding)1 GroovyClassLoader (groovy.lang.GroovyClassLoader)1 GroovyShell (groovy.lang.GroovyShell)1 Date (java.util.Date)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1 StringUtil (org.codelibs.core.lang.StringUtil)1 ThreadUtil (org.codelibs.core.lang.ThreadUtil)1 Pair (org.codelibs.core.misc.Pair)1 StreamUtil.stream (org.codelibs.core.stream.StreamUtil.stream)1 Constants (org.codelibs.fess.Constants)1 JobProcessingException (org.codelibs.fess.exception.JobProcessingException)1 CrawlingInfoHelper (org.codelibs.fess.helper.CrawlingInfoHelper)1