use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class HtmlTransformer method storeData.
protected void storeData(final ResponseData responseData, final ResultData resultData) {
try (final InputStream is = responseData.getResponseBody()) {
final byte[] data = InputStreamUtil.getBytes(is);
resultData.setData(data);
resultData.setEncoding(responseData.getCharSet());
} catch (final CrawlerSystemException e) {
throw e;
} catch (final Exception e) {
throw new CrawlerSystemException("Could not store data.", e);
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class XmlTransformer method getData.
/**
* Returns data as XML content of String.
*
* @return XML content of String.
*/
@Override
public Object getData(final AccessResultData<?> accessResultData) {
if (dataClass == null) {
// check transformer name
if (!getName().equals(accessResultData.getTransformerName())) {
throw new CrawlerSystemException("Transformer is invalid. Use " + accessResultData.getTransformerName() + ". This transformer is " + getName() + ".");
}
final byte[] data = accessResultData.getData();
if (data == null) {
return null;
}
final String encoding = accessResultData.getEncoding();
try {
return new String(data, encoding == null ? Constants.UTF_8 : encoding);
} catch (final UnsupportedEncodingException e) {
if (logger.isInfoEnabled()) {
logger.info("Invalid charsetName: " + encoding + ". Changed to " + Constants.UTF_8, e);
}
return new String(data, Constants.UTF_8_CHARSET);
}
}
final Map<String, Object> dataMap = XmlUtil.getDataMap(accessResultData);
if (Map.class.equals(dataClass)) {
return dataMap;
}
try {
final Object obj = dataClass.newInstance();
BeanUtil.copyMapToBean(dataMap, obj);
return obj;
} catch (final Exception e) {
throw new CrawlerSystemException("Could not create/copy a data map to " + dataClass, e);
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class HostIntervalController method delayBeforeProcessing.
/*
* (non-Javadoc)
*
* @see org.codelibs.fess.crawler.interval.impl.AbstractIntervalController#
* delayBeforeProcessing()
*/
@Override
protected void delayBeforeProcessing() {
final UrlQueue<?> urlQueue = CrawlingParameterUtil.getUrlQueue();
if (urlQueue == null) {
return;
}
final String url = urlQueue.getUrl();
if (StringUtil.isBlank(url) || url.startsWith("file:")) {
// not target
return;
}
try {
final URL u = new URL(url);
final String host = u.getHost();
if (host == null) {
return;
}
final AtomicLong lastTime = lastTimes.putIfAbsent(host, new AtomicLong(SystemUtil.currentTimeMillis()));
if (lastTime == null) {
return;
}
synchronized (lastTime) {
while (true) {
final long currentTime = SystemUtil.currentTimeMillis();
final long delayTime = lastTime.get() + delayMillisBeforeProcessing - currentTime;
if (delayTime <= 0) {
lastTime.set(currentTime);
break;
}
lastTime.wait(delayTime);
}
}
} catch (final Exception e) {
throw new CrawlerSystemException(e);
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class BinaryTransformerTest method test_getData_wrongName.
public void test_getData_wrongName() throws Exception {
final AccessResultDataImpl accessResultData = new AccessResultDataImpl();
accessResultData.setTransformerName("transformer");
accessResultData.setData("xyz".getBytes());
try {
binaryTransformer.getData(accessResultData);
fail();
} catch (final CrawlerSystemException e) {
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class FileTransformerTest method test_getData_wrongName.
public void test_getData_wrongName() throws Exception {
final AccessResultDataImpl accessResultDataImpl = new AccessResultDataImpl();
accessResultDataImpl.setData("hoge.txt".getBytes());
accessResultDataImpl.setEncoding(Constants.UTF_8);
accessResultDataImpl.setTransformerName("transformer");
setBaseDir();
try {
final Object obj = fileTransformer.getData(accessResultDataImpl);
fail();
} catch (final CrawlerSystemException e) {
}
}
Aggregations