use of org.codelibs.fess.crawler.exception.CrawlingAccessException in project fess-crawler by codelibs.
the class HtmlTransformer method transform.
@Override
public ResultData transform(final ResponseData responseData) {
if (responseData == null || !responseData.hasResponseBody()) {
throw new CrawlingAccessException("No response body.");
}
// encoding
updateCharset(responseData);
final ResultData resultData = new ResultData();
resultData.setTransformerName(getName());
try {
// data
storeData(responseData, resultData);
if (isHtml(responseData) && !responseData.isNoFollow()) {
// urls
storeChildUrls(responseData, resultData);
}
} finally {
xpathAPI.remove();
}
final Object redirectUrlObj = responseData.getMetaDataMap().get(LOCATION_HEADER);
if (redirectUrlObj instanceof String) {
final UrlConvertHelper urlConvertHelper = crawlerContainer.getComponent("urlConvertHelper");
resultData.addUrl(RequestDataBuilder.newRequestData().get().url(urlConvertHelper.convert(redirectUrlObj.toString())).build());
}
return resultData;
}
use of org.codelibs.fess.crawler.exception.CrawlingAccessException in project fess-crawler by codelibs.
the class SmbClientTest method test_doGet_accessTimeoutTarget.
public void test_doGet_accessTimeoutTarget() {
SmbClient client = new SmbClient() {
@Override
protected ResponseData getResponseData(final String uri, final boolean includeContent) {
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
throw new CrawlingAccessException(e);
}
return null;
}
};
client.setAccessTimeout(1);
try {
client.doGet("smb://localhost/test.txt");
fail();
} catch (CrawlingAccessException e) {
assertTrue(e.getCause() instanceof InterruptedException);
}
}
use of org.codelibs.fess.crawler.exception.CrawlingAccessException in project fess-crawler by codelibs.
the class FileSystemClientTest method test_doGet_accessTimeoutTarget.
public void test_doGet_accessTimeoutTarget() {
FileSystemClient client = new FileSystemClient() {
@Override
protected ResponseData getResponseData(final String uri, final boolean includeContent) {
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
throw new CrawlingAccessException(e);
}
return null;
}
};
client.setAccessTimeout(1);
try {
client.doGet("file:/tmp/test.txt");
fail();
} catch (CrawlingAccessException e) {
assertTrue(e.getCause() instanceof InterruptedException);
}
}
use of org.codelibs.fess.crawler.exception.CrawlingAccessException in project fess-crawler by codelibs.
the class FileSystemClientTest method test_doHead_accessTimeoutTarget.
public void test_doHead_accessTimeoutTarget() {
FileSystemClient client = new FileSystemClient() {
@Override
protected ResponseData getResponseData(final String uri, final boolean includeContent) {
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
throw new CrawlingAccessException(e);
}
return null;
}
};
client.setAccessTimeout(1);
try {
client.doHead("file:/tmp/test.txt");
fail();
} catch (CrawlingAccessException e) {
assertTrue(e.getCause() instanceof InterruptedException);
}
}
use of org.codelibs.fess.crawler.exception.CrawlingAccessException in project fess-crawler by codelibs.
the class FtpClientTest method test_ftpInfo.
public void test_ftpInfo() {
String value;
FtpInfo ftpInfo;
try {
ftpInfo = new FtpClient.FtpInfo(null);
fail();
} catch (CrawlingAccessException e) {
// ignore
}
try {
ftpInfo = new FtpClient.FtpInfo("");
fail();
} catch (CrawlingAccessException e) {
// ignore
}
try {
ftpInfo = new FtpClient.FtpInfo("abc");
fail();
} catch (CrawlingAccessException e) {
// ignore
}
value = "ftp://123.123.123.123:9999/";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals(value, ftpInfo.toUrl());
assertEquals("123.123.123.123:9999", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(9999, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertNull(ftpInfo.getName());
value = "ftp://123.123.123.123/test.txt";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals(value, ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertEquals("test.txt", ftpInfo.getName());
value = "ftp://123.123.123.123/aaa/../test.txt";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals("ftp://123.123.123.123/test.txt", ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertEquals("test.txt", ftpInfo.getName());
assertEquals("ftp://123.123.123.123/", ftpInfo.toUrl("/"));
value = "ftp://123.123.123.123:21/test1/test.txt";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals("ftp://123.123.123.123/test1/test.txt", ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/test1", ftpInfo.getParent());
assertEquals("test.txt", ftpInfo.getName());
assertEquals("ftp://123.123.123.123/", ftpInfo.toUrl("/"));
assertEquals("ftp://123.123.123.123/aaa/bbb/ccc.txt", ftpInfo.toUrl("/aaa//bbb/ccc.txt"));
assertEquals("ftp://123.123.123.123/ccc.txt", ftpInfo.toUrl("/aaa/../ccc.txt"));
value = "ftp://123.123.123.123/test test.txt";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals(value, ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertEquals("test test.txt", ftpInfo.getName());
value = "ftp://123.123.123.123/テスト.txt";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals(value, ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertEquals("テスト.txt", ftpInfo.getName());
value = "ftp://123.123.123.123/";
ftpInfo = new FtpClient.FtpInfo(value);
assertEquals(value, ftpInfo.toUrl());
assertEquals("123.123.123.123:21", ftpInfo.getCacheKey());
assertEquals("123.123.123.123", ftpInfo.getHost());
assertEquals(21, ftpInfo.getPort());
assertEquals("/", ftpInfo.getParent());
assertNull(ftpInfo.getName());
}
Aggregations