use of org.codelibs.fess.crawler.exception.ChildUrlsException in project fess-crawler by codelibs.
the class SmbClient method doHead.
/*
* (non-Javadoc)
*
* @see org.codelibs.fess.crawler.client.CrawlerClient#doHead(java.lang.String)
*/
@Override
public ResponseData doHead(final String url) {
try {
final ResponseData responseData = processRequest(url, false);
responseData.setMethod(Constants.HEAD_METHOD);
return responseData;
} catch (final ChildUrlsException e) {
return null;
}
}
use of org.codelibs.fess.crawler.exception.ChildUrlsException in project fess-crawler by codelibs.
the class SmbClient method getResponseData.
protected ResponseData getResponseData(final String uri, final boolean includeContent) {
final ResponseData responseData = new ResponseData();
responseData.setMethod(Constants.GET_METHOD);
final String filePath = preprocessUri(uri);
responseData.setUrl(filePath);
SmbFile file = null;
final SmbAuthentication smbAuthentication = smbAuthenticationHolder.get(filePath);
if (logger.isDebugEnabled()) {
logger.debug("Creating SmbFile: " + filePath);
}
try {
if (smbAuthentication == null) {
file = new SmbFile(filePath);
} else {
file = new SmbFile(filePath, smbAuthentication.getAuthentication());
}
} catch (final MalformedURLException e) {
logger.warn("Could not parse url: " + filePath, e);
}
if (logger.isDebugEnabled()) {
logger.debug("Processing SmbFile: " + filePath);
}
try {
if (file == null) {
responseData.setHttpStatusCode(Constants.NOT_FOUND_STATUS_CODE);
responseData.setCharSet(charset);
responseData.setContentLength(0);
} else if (file.isFile()) {
if (logger.isDebugEnabled()) {
logger.debug("Checking SmbFile Size: " + filePath);
}
responseData.setContentLength(file.length());
checkMaxContentLength(responseData);
responseData.setHttpStatusCode(Constants.OK_STATUS_CODE);
responseData.setCharSet(geCharSet(file));
responseData.setLastModified(new Date(file.lastModified()));
responseData.addMetaData(SMB_CREATE_TIME, new Date(file.createTime()));
try {
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile Owner: " + filePath);
}
final SID ownerUser = file.getOwnerUser();
if (ownerUser != null) {
final String[] ownerAttributes = { ownerUser.getAccountName(), ownerUser.getDomainName() };
responseData.addMetaData(SMB_OWNER_ATTRIBUTES, ownerAttributes);
}
} catch (final IOException e) {
logger.warn("Cannot get owner of the file: " + filePath);
}
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile ACL: " + filePath);
}
processAccessControlEntries(responseData, file);
final Map<String, List<String>> headerFieldMap = file.getHeaderFields();
if (headerFieldMap != null) {
for (final Map.Entry<String, List<String>> entry : headerFieldMap.entrySet()) {
responseData.addMetaData(entry.getKey(), entry.getValue());
}
}
if (file.canRead()) {
final MimeTypeHelper mimeTypeHelper = crawlerContainer.getComponent("mimeTypeHelper");
if (includeContent) {
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile Content: " + filePath);
}
if (file.getContentLength() < maxCachedContentSize) {
try (InputStream contentStream = new BufferedInputStream(new SmbFileInputStream(file))) {
responseData.setResponseBody(InputStreamUtil.getBytes(contentStream));
} catch (final Exception e) {
logger.warn("I/O Exception.", e);
responseData.setHttpStatusCode(Constants.SERVER_ERROR_STATUS_CODE);
}
} else {
File outputFile = null;
try {
outputFile = File.createTempFile("crawler-SmbClient-", ".out");
copy(file, outputFile);
responseData.setResponseBody(outputFile, true);
} catch (final Exception e) {
logger.warn("I/O Exception.", e);
responseData.setHttpStatusCode(Constants.SERVER_ERROR_STATUS_CODE);
if (outputFile != null && !outputFile.delete()) {
logger.warn("Could not delete " + outputFile.getAbsolutePath());
}
}
}
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile MIME Type: " + filePath);
}
try (final InputStream is = responseData.getResponseBody()) {
responseData.setMimeType(mimeTypeHelper.getContentType(is, file.getName()));
} catch (final Exception e) {
responseData.setMimeType(mimeTypeHelper.getContentType(null, file.getName()));
}
} else {
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile MIME Type: " + filePath);
}
try (final InputStream is = new SmbFileInputStream(file)) {
responseData.setMimeType(mimeTypeHelper.getContentType(is, file.getName()));
} catch (final Exception e) {
responseData.setMimeType(mimeTypeHelper.getContentType(null, file.getName()));
}
}
if (contentLengthHelper != null) {
final long maxLength = contentLengthHelper.getMaxLength(responseData.getMimeType());
if (responseData.getContentLength() > maxLength) {
throw new MaxLengthExceededException("The content length (" + responseData.getContentLength() + " byte) is over " + maxLength + " byte. The url is " + filePath);
}
}
} else {
// Forbidden
responseData.setHttpStatusCode(Constants.FORBIDDEN_STATUS_CODE);
responseData.setMimeType(APPLICATION_OCTET_STREAM);
}
} else if (file.isDirectory()) {
if (logger.isDebugEnabled()) {
logger.debug("Parsing SmbFile Directory: " + filePath);
}
final Set<RequestData> requestDataSet = new HashSet<>(100);
if (includeContent) {
final SmbFile[] files = file.listFiles();
if (files != null) {
for (final SmbFile f : files) {
final String chileUri = f.toString();
requestDataSet.add(RequestDataBuilder.newRequestData().get().url(chileUri).build());
}
}
}
throw new ChildUrlsException(requestDataSet, this.getClass().getName() + "#getResponseData");
} else {
responseData.setHttpStatusCode(Constants.NOT_FOUND_STATUS_CODE);
responseData.setCharSet(charset);
responseData.setContentLength(0);
}
} catch (final CrawlerSystemException e) {
CloseableUtil.closeQuietly(responseData);
throw e;
} catch (final SmbException e) {
CloseableUtil.closeQuietly(responseData);
throw new CrawlingAccessException("Could not access " + uri, e);
}
return responseData;
}
use of org.codelibs.fess.crawler.exception.ChildUrlsException in project fess-crawler by codelibs.
the class SitemapsResponseProcessor method process.
@Override
public void process(final ResponseData responseData) {
final SitemapsHelper sitemapsHelper = crawlerContainer.getComponent("sitemapsHelper");
try (final InputStream responseBody = responseData.getResponseBody()) {
final SitemapSet sitemapSet = sitemapsHelper.parse(responseBody);
final Set<RequestData> requestDataSet = new LinkedHashSet<>();
for (final Sitemap sitemap : sitemapSet.getSitemaps()) {
if (sitemap != null) {
requestDataSet.add(RequestDataBuilder.newRequestData().get().url(sitemap.getLoc()).build());
}
}
throw new ChildUrlsException(requestDataSet, this.getClass().getName() + "#process");
} catch (final IOException e) {
throw new IORuntimeException(e);
}
}
use of org.codelibs.fess.crawler.exception.ChildUrlsException in project fess-crawler by codelibs.
the class FileSystemClientTest method test_doGet_dir.
public void test_doGet_dir() {
final File file = ResourceUtil.getResourceAsFile("test");
String path = file.getAbsolutePath();
if (!path.startsWith("/")) {
path = "/" + path.replace('\\', '/');
}
try {
fsClient.doGet("file://" + path);
fail();
} catch (final ChildUrlsException e) {
final Set<RequestData> urlSet = e.getChildUrlList();
for (final RequestData requestData : urlSet.toArray(new RequestData[urlSet.size()])) {
String url = requestData.getUrl();
if (url.indexOf(".svn") < 0) {
assertTrue(url.contains("test/dir1") || url.contains("test/dir2") || url.contains("test/text1.txt") || url.contains("test/text2.txt") || url.contains("test/text%203.txt"));
}
}
}
}
use of org.codelibs.fess.crawler.exception.ChildUrlsException in project fess-crawler by codelibs.
the class FtpClientTest method test_doGet_root_dir.
public void test_doGet_root_dir() throws FtpException {
FtpServer server = null;
try {
String username = "testuser";
String password = "testpass";
server = startFtpServer(FTP_PORT, username, password);
Map<String, Object> params = new HashMap<String, Object>();
FtpAuthentication auth = new FtpAuthentication();
auth.setUsername(username);
auth.setPassword(password);
params.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY, new FtpAuthentication[] { auth });
ftpClient.setInitParameterMap(params);
ftpClient.doGet("ftp://localhost:" + FTP_PORT + "/");
fail();
} catch (final ChildUrlsException e) {
final Set<RequestData> urlSet = e.getChildUrlList();
assertEquals(5, urlSet.size());
for (final RequestData requestData : urlSet.toArray(new RequestData[urlSet.size()])) {
String url = requestData.getUrl();
assertTrue(url.contains("dir1") || url.contains("dir2") || url.contains("text1.txt") || url.contains("text2.txt") || url.contains("text 3.txt"));
}
} finally {
if (server != null) {
server.stop();
}
}
}
Aggregations