use of com.orangesignal.csv.CsvReader in project fess by codelibs.
the class CsvDataStoreImpl method processCsv.
protected void processCsv(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap, final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap, final CsvConfig csvConfig, final File csvFile, final long readInterval, final String csvFileEncoding, final boolean hasHeaderLine) {
logger.info("Loading " + csvFile.getAbsolutePath());
CsvReader csvReader = null;
try {
csvReader = new CsvReader(new BufferedReader(new InputStreamReader(new FileInputStream(csvFile), csvFileEncoding)), csvConfig);
List<String> headerList = null;
if (hasHeaderLine) {
headerList = csvReader.readValues();
}
List<String> list;
boolean loop = true;
while ((list = csvReader.readValues()) != null && loop && alive) {
final Map<String, Object> dataMap = new HashMap<>();
dataMap.putAll(defaultDataMap);
final Map<String, Object> resultMap = new LinkedHashMap<>();
resultMap.putAll(paramMap);
resultMap.put("csvfile", csvFile.getAbsolutePath());
resultMap.put("csvfilename", csvFile.getName());
resultMap.put("crawlingConfig", dataConfig);
boolean foundValues = false;
for (int i = 0; i < list.size(); i++) {
String key = null;
String value = list.get(i);
if (value == null) {
value = StringUtil.EMPTY;
}
if (StringUtil.isNotBlank(value)) {
foundValues = true;
}
if (headerList != null && headerList.size() > i) {
key = headerList.get(i);
if (StringUtil.isNotBlank(key)) {
resultMap.put(key, value);
}
}
key = CELL_PREFIX + Integer.toString(i + 1);
resultMap.put(key, value);
}
if (!foundValues) {
logger.debug("No data in line: {}", resultMap);
continue;
}
if (logger.isDebugEnabled()) {
for (final Map.Entry<String, Object> entry : resultMap.entrySet()) {
logger.debug(entry.getKey() + "=" + entry.getValue());
}
}
final Map<String, Object> crawlingContext = new HashMap<>();
crawlingContext.put("doc", dataMap);
resultMap.put("crawlingContext", crawlingContext);
for (final Map.Entry<String, String> entry : scriptMap.entrySet()) {
final Object convertValue = convertValue(entry.getValue(), resultMap);
if (convertValue != null) {
dataMap.put(entry.getKey(), convertValue);
}
}
if (logger.isDebugEnabled()) {
for (final Map.Entry<String, Object> entry : dataMap.entrySet()) {
logger.debug(entry.getKey() + "=" + entry.getValue());
}
}
try {
callback.store(paramMap, dataMap);
} catch (final CrawlingAccessException e) {
logger.warn("Crawling Access Exception at : " + dataMap, e);
Throwable target = e;
if (target instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
if (causes.length > 0) {
target = causes[causes.length - 1];
}
}
String errorName;
final Throwable cause = target.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = target.getClass().getCanonicalName();
}
String url;
if (target instanceof DataStoreCrawlingException) {
final DataStoreCrawlingException dce = (DataStoreCrawlingException) target;
url = dce.getUrl();
if (dce.aborted()) {
loop = false;
}
} else {
url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
}
final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
failureUrlService.store(dataConfig, errorName, url, target);
} catch (final Throwable t) {
logger.warn("Crawling Access Exception at : " + dataMap, t);
final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
failureUrlService.store(dataConfig, t.getClass().getCanonicalName(), url, t);
}
if (readInterval > 0) {
sleep(readInterval);
}
}
} catch (final Exception e) {
throw new DataStoreException("Failed to crawl data when reading csv file.", e);
} finally {
IOUtils.closeQuietly(csvReader);
}
}
use of com.orangesignal.csv.CsvReader in project fess by codelibs.
the class CrawlingInfoService method importCsv.
public void importCsv(final Reader reader) {
@SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
final DateFormat formatter = new SimpleDateFormat(CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
try {
List<String> list;
// ignore header
csvReader.readValues();
while ((list = csvReader.readValues()) != null) {
try {
final String sessionId = list.get(0);
CrawlingInfo crawlingInfo = crawlingInfoBhv.selectEntity(cb -> {
cb.query().setSessionId_Equal(sessionId);
cb.specify().columnSessionId();
}).orElse(// TODO
null);
if (crawlingInfo == null) {
crawlingInfo = new CrawlingInfo();
crawlingInfo.setSessionId(list.get(0));
crawlingInfo.setCreatedTime(formatter.parse(list.get(1)).getTime());
crawlingInfoBhv.insert(crawlingInfo, op -> op.setRefreshPolicy(Constants.TRUE));
}
final CrawlingInfoParam entity = new CrawlingInfoParam();
entity.setCrawlingInfoId(crawlingInfo.getId());
entity.setKey(list.get(2));
entity.setValue(list.get(3));
entity.setCreatedTime(formatter.parse(list.get(4)).getTime());
crawlingInfoParamBhv.insert(entity, op -> op.setRefreshPolicy(Constants.TRUE));
} catch (final Exception e) {
logger.warn("Failed to read a click log: {}", list, e);
}
}
} catch (final IOException e) {
logger.warn("Failed to read a click log.", e);
}
}
use of com.orangesignal.csv.CsvReader in project fess by codelibs.
the class BadWordService method importCsv.
public void importCsv(final Reader reader) {
@SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
try {
List<String> list;
// ignore header
csvReader.readValues();
while ((list = csvReader.readValues()) != null) {
String targetWord = getValue(list, 0);
if (StringUtil.isBlank(targetWord)) {
// skip
continue;
}
try {
boolean isDelete = false;
if (targetWord.startsWith(DELETE_PREFIX)) {
isDelete = true;
targetWord = targetWord.substring(2);
}
final String target = targetWord;
// TODO
BadWord badWord = badWordBhv.selectEntity(cb -> cb.query().setSuggestWord_Equal(target)).orElse(null);
final long now = ComponentUtil.getSystemHelper().getCurrentTimeAsLong();
if (isDelete) {
badWordBhv.delete(badWord);
} else if (badWord == null) {
badWord = new BadWord();
badWord.setSuggestWord(targetWord);
badWord.setCreatedBy(Constants.SYSTEM_USER);
badWord.setCreatedTime(now);
badWordBhv.insert(badWord);
} else {
badWord.setUpdatedBy(Constants.SYSTEM_USER);
badWord.setUpdatedTime(now);
badWordBhv.update(badWord);
}
} catch (final Exception e) {
logger.warn("Failed to read a sugget elevate word: {}", list, e);
}
}
// TODO replace _all
searchEngineClient.refresh("_all");
} catch (final IOException e) {
logger.warn("Failed to read a sugget elevate word.", e);
}
}
use of com.orangesignal.csv.CsvReader in project fess by codelibs.
the class ElevateWordService method importCsv.
public void importCsv(final Reader reader) {
final PermissionHelper permissionHelper = ComponentUtil.getPermissionHelper();
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource") final CsvReader csvReader = new CsvReader(reader, cfg);
try {
List<String> list;
// ignore header
csvReader.readValues();
while ((list = csvReader.readValues()) != null) {
final String suggestWord = getValue(list, 0);
if (StringUtil.isBlank(suggestWord)) {
// skip
continue;
}
try {
final String[] permissions = split(getValue(list, 2), ",").get(stream -> stream.map(permissionHelper::encode).filter(StringUtil::isNotBlank).distinct().toArray(n -> new String[n]));
final String[] labels = split(getValue(list, 3), ",").get(stream -> stream.filter(StringUtil::isNotBlank).distinct().toArray(n -> new String[n]));
ElevateWord elevateWord = elevateWordBhv.selectEntity(cb -> {
cb.query().setSuggestWord_Equal(suggestWord);
if (permissions.length > 0) {
cb.query().setPermissions_InScope(stream(permissions).get(stream -> stream.collect(Collectors.toList())));
}
}).orElse(null);
final String reading = getValue(list, 1);
final String boost = getValue(list, 4);
final long now = ComponentUtil.getSystemHelper().getCurrentTimeAsLong();
if (elevateWord == null) {
elevateWord = new ElevateWord();
elevateWord.setSuggestWord(suggestWord);
elevateWord.setReading(reading);
elevateWord.setPermissions(permissions);
elevateWord.setBoost(StringUtil.isBlank(boost) ? 1.0f : Float.parseFloat(boost));
elevateWord.setCreatedBy(Constants.SYSTEM_USER);
elevateWord.setCreatedTime(now);
elevateWordBhv.insert(elevateWord);
final String id = elevateWord.getId();
final List<ElevateWordToLabel> mappingList = stream(labels).get(stream -> stream.map(l -> labelTypeBhv.selectEntity(cb -> cb.query().setValue_Equal(l)).map(e -> {
final ElevateWordToLabel m = new ElevateWordToLabel();
m.setElevateWordId(id);
m.setLabelTypeId(e.getId());
return m;
}).orElse(null)).filter(e -> e != null).collect(Collectors.toList()));
if (!mappingList.isEmpty()) {
elevateWordToLabelBhv.batchInsert(mappingList);
}
} else if (StringUtil.isBlank(reading) && StringUtil.isBlank(boost)) {
elevateWordBhv.delete(elevateWord);
final String id = elevateWord.getId();
elevateWordToLabelBhv.queryDelete(cb -> cb.query().setElevateWordId_Equal(id));
} else {
elevateWord.setReading(reading);
elevateWord.setPermissions(permissions);
elevateWord.setBoost(StringUtil.isBlank(boost) ? 1.0f : Float.parseFloat(boost));
elevateWord.setUpdatedBy(Constants.SYSTEM_USER);
elevateWord.setUpdatedTime(now);
elevateWordBhv.update(elevateWord);
final String id = elevateWord.getId();
final List<ElevateWordToLabel> mappingList = stream(labels).get(stream -> stream.map(l -> labelTypeBhv.selectEntity(cb -> cb.query().setValue_Equal(l)).map(e -> {
final List<ElevateWordToLabel> mList = elevateWordToLabelBhv.selectList(cb -> {
cb.query().setElevateWordId_Equal(id);
cb.query().setLabelTypeId_Equal(e.getId());
});
if (!mList.isEmpty()) {
return null;
}
final ElevateWordToLabel m = new ElevateWordToLabel();
m.setElevateWordId(id);
m.setLabelTypeId(e.getId());
return m;
}).orElse(null)).filter(e -> e != null).collect(Collectors.toList()));
if (!mappingList.isEmpty()) {
elevateWordToLabelBhv.batchInsert(mappingList);
}
}
} catch (final Exception e) {
logger.warn("Failed to read a sugget elevate word: {}", list, e);
}
}
elevateWordBhv.refresh();
} catch (final IOException e) {
logger.warn("Failed to read a sugget elevate word.", e);
}
}
Aggregations