Search in sources :

Example 1 with CsvReader

use of com.orangesignal.csv.CsvReader in project fess by codelibs.

the class CsvDataStoreImpl method processCsv.

protected void processCsv(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap, final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap, final CsvConfig csvConfig, final File csvFile, final long readInterval, final String csvFileEncoding, final boolean hasHeaderLine) {
    logger.info("Loading " + csvFile.getAbsolutePath());
    CsvReader csvReader = null;
    try {
        csvReader = new CsvReader(new BufferedReader(new InputStreamReader(new FileInputStream(csvFile), csvFileEncoding)), csvConfig);
        List<String> headerList = null;
        if (hasHeaderLine) {
            headerList = csvReader.readValues();
        }
        List<String> list;
        boolean loop = true;
        while ((list = csvReader.readValues()) != null && loop && alive) {
            final Map<String, Object> dataMap = new HashMap<>();
            dataMap.putAll(defaultDataMap);
            final Map<String, Object> resultMap = new LinkedHashMap<>();
            resultMap.putAll(paramMap);
            resultMap.put("csvfile", csvFile.getAbsolutePath());
            resultMap.put("csvfilename", csvFile.getName());
            resultMap.put("crawlingConfig", dataConfig);
            boolean foundValues = false;
            for (int i = 0; i < list.size(); i++) {
                String key = null;
                String value = list.get(i);
                if (value == null) {
                    value = StringUtil.EMPTY;
                }
                if (StringUtil.isNotBlank(value)) {
                    foundValues = true;
                }
                if (headerList != null && headerList.size() > i) {
                    key = headerList.get(i);
                    if (StringUtil.isNotBlank(key)) {
                        resultMap.put(key, value);
                    }
                }
                key = CELL_PREFIX + Integer.toString(i + 1);
                resultMap.put(key, value);
            }
            if (!foundValues) {
                logger.debug("No data in line: {}", resultMap);
                continue;
            }
            if (logger.isDebugEnabled()) {
                for (final Map.Entry<String, Object> entry : resultMap.entrySet()) {
                    logger.debug(entry.getKey() + "=" + entry.getValue());
                }
            }
            final Map<String, Object> crawlingContext = new HashMap<>();
            crawlingContext.put("doc", dataMap);
            resultMap.put("crawlingContext", crawlingContext);
            for (final Map.Entry<String, String> entry : scriptMap.entrySet()) {
                final Object convertValue = convertValue(entry.getValue(), resultMap);
                if (convertValue != null) {
                    dataMap.put(entry.getKey(), convertValue);
                }
            }
            if (logger.isDebugEnabled()) {
                for (final Map.Entry<String, Object> entry : dataMap.entrySet()) {
                    logger.debug(entry.getKey() + "=" + entry.getValue());
                }
            }
            try {
                callback.store(paramMap, dataMap);
            } catch (final CrawlingAccessException e) {
                logger.warn("Crawling Access Exception at : " + dataMap, e);
                Throwable target = e;
                if (target instanceof MultipleCrawlingAccessException) {
                    final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
                    if (causes.length > 0) {
                        target = causes[causes.length - 1];
                    }
                }
                String errorName;
                final Throwable cause = target.getCause();
                if (cause != null) {
                    errorName = cause.getClass().getCanonicalName();
                } else {
                    errorName = target.getClass().getCanonicalName();
                }
                String url;
                if (target instanceof DataStoreCrawlingException) {
                    final DataStoreCrawlingException dce = (DataStoreCrawlingException) target;
                    url = dce.getUrl();
                    if (dce.aborted()) {
                        loop = false;
                    }
                } else {
                    url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
                }
                final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
                failureUrlService.store(dataConfig, errorName, url, target);
            } catch (final Throwable t) {
                logger.warn("Crawling Access Exception at : " + dataMap, t);
                final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
                final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
                failureUrlService.store(dataConfig, t.getClass().getCanonicalName(), url, t);
            }
            if (readInterval > 0) {
                sleep(readInterval);
            }
        }
    } catch (final Exception e) {
        throw new DataStoreException("Failed to crawl data when reading csv file.", e);
    } finally {
        IOUtils.closeQuietly(csvReader);
    }
}
Also used : MultipleCrawlingAccessException(org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException) DataStoreException(org.codelibs.fess.exception.DataStoreException) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MultipleCrawlingAccessException(org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException) FailureUrlService(org.codelibs.fess.app.service.FailureUrlService) FileInputStream(java.io.FileInputStream) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MultipleCrawlingAccessException(org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException) DataStoreCrawlingException(org.codelibs.fess.exception.DataStoreCrawlingException) DataStoreException(org.codelibs.fess.exception.DataStoreException) LinkedHashMap(java.util.LinkedHashMap) CsvReader(com.orangesignal.csv.CsvReader) DataStoreCrawlingException(org.codelibs.fess.exception.DataStoreCrawlingException) BufferedReader(java.io.BufferedReader) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 2 with CsvReader

use of com.orangesignal.csv.CsvReader in project fess by codelibs.

the class CrawlingInfoService method importCsv.

public void importCsv(final Reader reader) {
    @SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
    final DateFormat formatter = new SimpleDateFormat(CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
    try {
        List<String> list;
        // ignore header
        csvReader.readValues();
        while ((list = csvReader.readValues()) != null) {
            try {
                final String sessionId = list.get(0);
                CrawlingInfo crawlingInfo = crawlingInfoBhv.selectEntity(cb -> {
                    cb.query().setSessionId_Equal(sessionId);
                    cb.specify().columnSessionId();
                }).orElse(// TODO
                null);
                if (crawlingInfo == null) {
                    crawlingInfo = new CrawlingInfo();
                    crawlingInfo.setSessionId(list.get(0));
                    crawlingInfo.setCreatedTime(formatter.parse(list.get(1)).getTime());
                    crawlingInfoBhv.insert(crawlingInfo, op -> op.setRefreshPolicy(Constants.TRUE));
                }
                final CrawlingInfoParam entity = new CrawlingInfoParam();
                entity.setCrawlingInfoId(crawlingInfo.getId());
                entity.setKey(list.get(2));
                entity.setValue(list.get(3));
                entity.setCreatedTime(formatter.parse(list.get(4)).getTime());
                crawlingInfoParamBhv.insert(entity, op -> op.setRefreshPolicy(Constants.TRUE));
            } catch (final Exception e) {
                logger.warn("Failed to read a click log: {}", list, e);
            }
        }
    } catch (final IOException e) {
        logger.warn("Failed to read a click log.", e);
    }
}
Also used : CsvReader(com.orangesignal.csv.CsvReader) CrawlingInfo(org.codelibs.fess.es.config.exentity.CrawlingInfo) BeanUtil(org.codelibs.core.beans.util.BeanUtil) EntityRowHandler(org.dbflute.bhv.readable.EntityRowHandler) Constants(org.codelibs.fess.Constants) FessSystemException(org.codelibs.fess.exception.FessSystemException) CrawlingInfoBhv(org.codelibs.fess.es.config.exbhv.CrawlingInfoBhv) ListResultBean(org.dbflute.cbean.result.ListResultBean) LocalDateTime(java.time.LocalDateTime) SimpleDateFormat(java.text.SimpleDateFormat) ArrayList(java.util.ArrayList) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) CrawlingInfoParam(org.codelibs.fess.es.config.exentity.CrawlingInfoParam) PagingResultBean(org.dbflute.cbean.result.PagingResultBean) DateFormat(java.text.DateFormat) CrawlingInfoPager(org.codelibs.fess.app.pager.CrawlingInfoPager) OptionalEntity(org.dbflute.optional.OptionalEntity) CrawlingInfoParamBhv(org.codelibs.fess.es.config.exbhv.CrawlingInfoParamBhv) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) CsvWriter(com.orangesignal.csv.CsvWriter) Set(java.util.Set) IOException(java.io.IOException) Reader(java.io.Reader) CoreLibConstants(org.codelibs.core.CoreLibConstants) Collectors(java.util.stream.Collectors) CrawlingInfoCB(org.codelibs.fess.es.config.cbean.CrawlingInfoCB) CsvConfig(com.orangesignal.csv.CsvConfig) List(java.util.List) Logger(org.apache.logging.log4j.Logger) CrawlingInfo(org.codelibs.fess.es.config.exentity.CrawlingInfo) ComponentUtil(org.codelibs.fess.util.ComponentUtil) DateTimeFormatter(java.time.format.DateTimeFormatter) Writer(java.io.Writer) Collections(java.util.Collections) LogManager(org.apache.logging.log4j.LogManager) CsvReader(com.orangesignal.csv.CsvReader) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) CsvConfig(com.orangesignal.csv.CsvConfig) IOException(java.io.IOException) SimpleDateFormat(java.text.SimpleDateFormat) CrawlingInfoParam(org.codelibs.fess.es.config.exentity.CrawlingInfoParam) FessSystemException(org.codelibs.fess.exception.FessSystemException) IOException(java.io.IOException)

Example 3 with CsvReader

use of com.orangesignal.csv.CsvReader in project fess by codelibs.

the class BadWordService method importCsv.

public void importCsv(final Reader reader) {
    @SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
    try {
        List<String> list;
        // ignore header
        csvReader.readValues();
        while ((list = csvReader.readValues()) != null) {
            String targetWord = getValue(list, 0);
            if (StringUtil.isBlank(targetWord)) {
                // skip
                continue;
            }
            try {
                boolean isDelete = false;
                if (targetWord.startsWith(DELETE_PREFIX)) {
                    isDelete = true;
                    targetWord = targetWord.substring(2);
                }
                final String target = targetWord;
                // TODO
                BadWord badWord = badWordBhv.selectEntity(cb -> cb.query().setSuggestWord_Equal(target)).orElse(null);
                final long now = ComponentUtil.getSystemHelper().getCurrentTimeAsLong();
                if (isDelete) {
                    badWordBhv.delete(badWord);
                } else if (badWord == null) {
                    badWord = new BadWord();
                    badWord.setSuggestWord(targetWord);
                    badWord.setCreatedBy(Constants.SYSTEM_USER);
                    badWord.setCreatedTime(now);
                    badWordBhv.insert(badWord);
                } else {
                    badWord.setUpdatedBy(Constants.SYSTEM_USER);
                    badWord.setUpdatedTime(now);
                    badWordBhv.update(badWord);
                }
            } catch (final Exception e) {
                logger.warn("Failed to read a sugget elevate word: {}", list, e);
            }
        }
        // TODO replace _all
        searchEngineClient.refresh("_all");
    } catch (final IOException e) {
        logger.warn("Failed to read a sugget elevate word.", e);
    }
}
Also used : CsvReader(com.orangesignal.csv.CsvReader) BeanUtil(org.codelibs.core.beans.util.BeanUtil) EntityRowHandler(org.dbflute.bhv.readable.EntityRowHandler) Constants(org.codelibs.fess.Constants) OptionalEntity(org.dbflute.optional.OptionalEntity) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) CsvWriter(com.orangesignal.csv.CsvWriter) IOException(java.io.IOException) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) BadWordBhv(org.codelibs.fess.es.config.exbhv.BadWordBhv) Reader(java.io.Reader) ArrayList(java.util.ArrayList) CsvConfig(com.orangesignal.csv.CsvConfig) List(java.util.List) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Logger(org.apache.logging.log4j.Logger) PagingResultBean(org.dbflute.cbean.result.PagingResultBean) BadWord(org.codelibs.fess.es.config.exentity.BadWord) ComponentUtil(org.codelibs.fess.util.ComponentUtil) Writer(java.io.Writer) BadWordPager(org.codelibs.fess.app.pager.BadWordPager) LogManager(org.apache.logging.log4j.LogManager) CsvReader(com.orangesignal.csv.CsvReader) BadWordCB(org.codelibs.fess.es.config.cbean.BadWordCB) BadWord(org.codelibs.fess.es.config.exentity.BadWord) CsvConfig(com.orangesignal.csv.CsvConfig) IOException(java.io.IOException) IOException(java.io.IOException)

Example 4 with CsvReader

use of com.orangesignal.csv.CsvReader in project fess by codelibs.

the class ElevateWordService method importCsv.

public void importCsv(final Reader reader) {
    final PermissionHelper permissionHelper = ComponentUtil.getPermissionHelper();
    final CsvConfig cfg = new CsvConfig(',', '"', '"');
    cfg.setEscapeDisabled(false);
    cfg.setQuoteDisabled(false);
    @SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, cfg);
    try {
        List<String> list;
        // ignore header
        csvReader.readValues();
        while ((list = csvReader.readValues()) != null) {
            final String suggestWord = getValue(list, 0);
            if (StringUtil.isBlank(suggestWord)) {
                // skip
                continue;
            }
            try {
                final String[] permissions = split(getValue(list, 2), ",").get(stream -> stream.map(permissionHelper::encode).filter(StringUtil::isNotBlank).distinct().toArray(n -> new String[n]));
                final String[] labels = split(getValue(list, 3), ",").get(stream -> stream.filter(StringUtil::isNotBlank).distinct().toArray(n -> new String[n]));
                ElevateWord elevateWord = elevateWordBhv.selectEntity(cb -> {
                    cb.query().setSuggestWord_Equal(suggestWord);
                    if (permissions.length > 0) {
                        cb.query().setPermissions_InScope(stream(permissions).get(stream -> stream.collect(Collectors.toList())));
                    }
                }).orElse(null);
                final String reading = getValue(list, 1);
                final String boost = getValue(list, 4);
                final long now = ComponentUtil.getSystemHelper().getCurrentTimeAsLong();
                if (elevateWord == null) {
                    elevateWord = new ElevateWord();
                    elevateWord.setSuggestWord(suggestWord);
                    elevateWord.setReading(reading);
                    elevateWord.setPermissions(permissions);
                    elevateWord.setBoost(StringUtil.isBlank(boost) ? 1.0f : Float.parseFloat(boost));
                    elevateWord.setCreatedBy(Constants.SYSTEM_USER);
                    elevateWord.setCreatedTime(now);
                    elevateWordBhv.insert(elevateWord);
                    final String id = elevateWord.getId();
                    final List<ElevateWordToLabel> mappingList = stream(labels).get(stream -> stream.map(l -> labelTypeBhv.selectEntity(cb -> cb.query().setValue_Equal(l)).map(e -> {
                        final ElevateWordToLabel m = new ElevateWordToLabel();
                        m.setElevateWordId(id);
                        m.setLabelTypeId(e.getId());
                        return m;
                    }).orElse(null)).filter(e -> e != null).collect(Collectors.toList()));
                    if (!mappingList.isEmpty()) {
                        elevateWordToLabelBhv.batchInsert(mappingList);
                    }
                } else if (StringUtil.isBlank(reading) && StringUtil.isBlank(boost)) {
                    elevateWordBhv.delete(elevateWord);
                    final String id = elevateWord.getId();
                    elevateWordToLabelBhv.queryDelete(cb -> cb.query().setElevateWordId_Equal(id));
                } else {
                    elevateWord.setReading(reading);
                    elevateWord.setPermissions(permissions);
                    elevateWord.setBoost(StringUtil.isBlank(boost) ? 1.0f : Float.parseFloat(boost));
                    elevateWord.setUpdatedBy(Constants.SYSTEM_USER);
                    elevateWord.setUpdatedTime(now);
                    elevateWordBhv.update(elevateWord);
                    final String id = elevateWord.getId();
                    final List<ElevateWordToLabel> mappingList = stream(labels).get(stream -> stream.map(l -> labelTypeBhv.selectEntity(cb -> cb.query().setValue_Equal(l)).map(e -> {
                        final List<ElevateWordToLabel> mList = elevateWordToLabelBhv.selectList(cb -> {
                            cb.query().setElevateWordId_Equal(id);
                            cb.query().setLabelTypeId_Equal(e.getId());
                        });
                        if (!mList.isEmpty()) {
                            return null;
                        }
                        final ElevateWordToLabel m = new ElevateWordToLabel();
                        m.setElevateWordId(id);
                        m.setLabelTypeId(e.getId());
                        return m;
                    }).orElse(null)).filter(e -> e != null).collect(Collectors.toList()));
                    if (!mappingList.isEmpty()) {
                        elevateWordToLabelBhv.batchInsert(mappingList);
                    }
                }
            } catch (final Exception e) {
                logger.warn("Failed to read a sugget elevate word: {}", list, e);
            }
        }
        elevateWordBhv.refresh();
    } catch (final IOException e) {
        logger.warn("Failed to read a sugget elevate word.", e);
    }
}
Also used : BeanUtil(org.codelibs.core.beans.util.BeanUtil) EntityRowHandler(org.dbflute.bhv.readable.EntityRowHandler) Constants(org.codelibs.fess.Constants) ElevateWordPager(org.codelibs.fess.app.pager.ElevateWordPager) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) PermissionHelper(org.codelibs.fess.helper.PermissionHelper) LabelType(org.codelibs.fess.es.config.exentity.LabelType) ArrayList(java.util.ArrayList) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) StreamUtil.split(org.codelibs.core.stream.StreamUtil.split) PagingResultBean(org.dbflute.cbean.result.PagingResultBean) LabelTypeBhv(org.codelibs.fess.es.config.exbhv.LabelTypeBhv) ElevateWordBhv(org.codelibs.fess.es.config.exbhv.ElevateWordBhv) ElevateWordCB(org.codelibs.fess.es.config.cbean.ElevateWordCB) ElevateWordToLabel(org.codelibs.fess.es.config.exentity.ElevateWordToLabel) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) OptionalEntity(org.dbflute.optional.OptionalEntity) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) CsvWriter(com.orangesignal.csv.CsvWriter) IOException(java.io.IOException) Reader(java.io.Reader) Collectors(java.util.stream.Collectors) CsvConfig(com.orangesignal.csv.CsvConfig) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ComponentUtil(org.codelibs.fess.util.ComponentUtil) ElevateWordToLabelBhv(org.codelibs.fess.es.config.exbhv.ElevateWordToLabelBhv) Writer(java.io.Writer) ElevateWord(org.codelibs.fess.es.config.exentity.ElevateWord) LogManager(org.apache.logging.log4j.LogManager) CsvReader(com.orangesignal.csv.CsvReader) IOException(java.io.IOException) ElevateWordToLabel(org.codelibs.fess.es.config.exentity.ElevateWordToLabel) IOException(java.io.IOException) CsvReader(com.orangesignal.csv.CsvReader) ElevateWord(org.codelibs.fess.es.config.exentity.ElevateWord) PermissionHelper(org.codelibs.fess.helper.PermissionHelper) ArrayList(java.util.ArrayList) List(java.util.List) CsvConfig(com.orangesignal.csv.CsvConfig)

Aggregations

CsvReader (com.orangesignal.csv.CsvReader)4 CsvConfig (com.orangesignal.csv.CsvConfig)3 CsvWriter (com.orangesignal.csv.CsvWriter)3 IOException (java.io.IOException)3 Reader (java.io.Reader)3 Writer (java.io.Writer)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Resource (javax.annotation.Resource)3 LogManager (org.apache.logging.log4j.LogManager)3 Logger (org.apache.logging.log4j.Logger)3 BeanUtil (org.codelibs.core.beans.util.BeanUtil)3 StringUtil (org.codelibs.core.lang.StringUtil)3 Constants (org.codelibs.fess.Constants)3 Collectors (java.util.stream.Collectors)2 SearchEngineClient (org.codelibs.fess.es.client.SearchEngineClient)2 FessConfig (org.codelibs.fess.mylasta.direction.FessConfig)2 ComponentUtil (org.codelibs.fess.util.ComponentUtil)2 EntityRowHandler (org.dbflute.bhv.readable.EntityRowHandler)2 PagingResultBean (org.dbflute.cbean.result.PagingResultBean)2