use of org.apache.commons.net.ftp.FTPFile in project fess-crawler by codelibs.
the class FtpClient method getResponseData.
protected ResponseData getResponseData(final String uri, final boolean includeContent) {
final ResponseData responseData = new ResponseData();
FTPClient client = null;
try {
responseData.setMethod(Constants.GET_METHOD);
final FtpInfo ftpInfo = new FtpInfo(uri);
responseData.setUrl(ftpInfo.toUrl());
client = getClient(ftpInfo);
FTPFile file = null;
client.changeWorkingDirectory(ftpInfo.getParent());
validateRequest(client);
if (ftpInfo.getName() == null) {
// root directory
final Set<RequestData> requestDataSet = new HashSet<>();
if (includeContent) {
try {
final FTPFile[] files = client.listFiles(ftpInfo.getParent(), FTPFileFilters.NON_NULL);
validateRequest(client);
for (final FTPFile f : files) {
final String chileUri = ftpInfo.toChildUrl(f.getName());
requestDataSet.add(RequestDataBuilder.newRequestData().get().url(chileUri).build());
}
} catch (final IOException e) {
disconnectInternalClient(client);
throw new CrawlingAccessException("Could not access " + uri, e);
}
}
ftpClientQueue.offer(client);
throw new ChildUrlsException(requestDataSet, this.getClass().getName() + "#getResponseData");
}
final FTPFile[] files = client.listFiles(null, FTPFileFilters.NON_NULL);
validateRequest(client);
for (final FTPFile f : files) {
if (ftpInfo.getName().equals(f.getName())) {
file = f;
break;
}
}
updateResponseData(uri, includeContent, responseData, client, ftpInfo, file);
} catch (final CrawlerSystemException e) {
CloseableUtil.closeQuietly(responseData);
throw e;
} catch (final Exception e) {
CloseableUtil.closeQuietly(responseData);
throw new CrawlingAccessException("Could not access " + uri, e);
}
return responseData;
}
use of org.apache.commons.net.ftp.FTPFile in project DataX by alibaba.
the class StandardFtpHelper method getListFiles.
@Override
public HashSet<String> getListFiles(String directoryPath, int parentLevel, int maxTraversalLevel) {
if (parentLevel < maxTraversalLevel) {
// 父级目录,以'/'结尾
String parentPath = null;
int pathLen = directoryPath.length();
if (directoryPath.contains("*") || directoryPath.contains("?")) {
// path是正则表达式
String subPath = UnstructuredStorageReaderUtil.getRegexPathParentPath(directoryPath);
if (isDirExist(subPath)) {
parentPath = subPath;
} else {
String message = String.format("不能进入目录:[%s]," + "请确认您的配置项path:[%s]存在,且配置的用户有权限进入", subPath, directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
} else if (isDirExist(directoryPath)) {
// path是目录
if (directoryPath.charAt(pathLen - 1) == IOUtils.DIR_SEPARATOR) {
parentPath = directoryPath;
} else {
parentPath = directoryPath + IOUtils.DIR_SEPARATOR;
}
} else if (isFileExist(directoryPath)) {
// path指向具体文件
sourceFiles.add(directoryPath);
return sourceFiles;
} else if (isSymbolicLink(directoryPath)) {
// path是链接文件
String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
} else {
String message = String.format("请确认您的配置项path:[%s]存在,且配置的用户有权限读取", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
try {
FTPFile[] fs = ftpClient.listFiles(new String(directoryPath.getBytes(), FTP.DEFAULT_CONTROL_ENCODING));
for (FTPFile ff : fs) {
String strName = ff.getName();
String filePath = parentPath + strName;
if (ff.isDirectory()) {
if (!(strName.equals(".") || strName.equals(".."))) {
// 递归处理
getListFiles(filePath, parentLevel + 1, maxTraversalLevel);
}
} else if (ff.isFile()) {
// 是文件
sourceFiles.add(filePath);
} else if (ff.isSymbolicLink()) {
// 是链接文件
String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
} else {
String message = String.format("请确认path:[%s]存在,且配置的用户有权限读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
}
// end for FTPFile
} catch (IOException e) {
String message = String.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
return sourceFiles;
} else {
// 超出最大递归层数
String message = String.format("获取path:[%s] 下文件列表时超出最大层数,请确认路径[%s]下不存在软连接文件", directoryPath, directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.OUT_MAX_DIRECTORY_LEVEL, message);
}
}
use of org.apache.commons.net.ftp.FTPFile in project DataX by alibaba.
the class StandardFtpHelperImpl method getAllFilesInDir.
@Override
public Set<String> getAllFilesInDir(String dir, String prefixFileName) {
Set<String> allFilesWithPointedPrefix = new HashSet<String>();
try {
boolean isDirExist = this.ftpClient.changeWorkingDirectory(dir);
if (!isDirExist) {
throw DataXException.asDataXException(FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, String.format("进入目录[%s]失败", dir));
}
this.printWorkingDirectory();
FTPFile[] fs = this.ftpClient.listFiles(dir);
// LOG.debug(JSON.toJSONString(this.ftpClient.listNames(dir)));
LOG.debug(String.format("ls: %s", JSON.toJSONString(fs, SerializerFeature.UseSingleQuotes)));
for (FTPFile ff : fs) {
String strName = ff.getName();
if (strName.startsWith(prefixFileName)) {
allFilesWithPointedPrefix.add(strName);
}
}
} catch (IOException e) {
String message = String.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常,拥有目录ls权限, errorMessage:%s", dir, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
return allFilesWithPointedPrefix;
}
use of org.apache.commons.net.ftp.FTPFile in project nutch by apache.
the class Client method retrieveList.
/**
* Retrieve list reply for path
*
* @param path a path on the FTP server
* @param entries a initialized {@link List} of
* {@link FTPFile}'s to populate with entries found at the path
* @param limit optionally impose a download limit if this value
* is >= 0, otherwise no limit
* @param parser a configured {@link FTPFileEntryParser}
* @throws IOException if there is a fatal I/O error, could be related to
* opening a passive data connection or retrieving data from the specified path
* @throws FtpExceptionCanNotHaveDataConnection if an error occurs whilst
* opening a passive data connection
* @throws FtpExceptionUnknownForcedDataClose if there is a bad reply from the
* FTP server
* @throws FtpExceptionControlClosedByForcedDataClose some ftp servers will
* close control channel if data channel socket is closed by our end before
* all data has been read out
*/
public void retrieveList(String path, List<FTPFile> entries, int limit, FTPFileEntryParser parser) throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose, FtpExceptionControlClosedByForcedDataClose {
Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
if (socket == null)
throw new FtpExceptionCanNotHaveDataConnection("LIST " + ((path == null) ? "" : path));
BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));
// force-close data channel socket, when download limit is reached
// boolean mandatory_close = false;
// List entries = new LinkedList();
int count = 0;
String line = parser.readNextEntry(reader);
while (line != null) {
FTPFile ftpFile = parser.parseFTPEntry(line);
// skip non-formatted lines
if (ftpFile == null) {
line = parser.readNextEntry(reader);
continue;
}
entries.add(ftpFile);
count += line.length();
// here, cut off is up to the line when total bytes is just over limit
if (limit >= 0 && count > limit) {
// mandatory_close = true;
break;
}
line = parser.readNextEntry(reader);
}
// if (mandatory_close)
// you always close here, no matter mandatory_close or not.
// however different ftp servers respond differently, see below.
socket.close();
try {
int reply = getReply();
if (!_notBadReply(reply))
throw new FtpExceptionUnknownForcedDataClose(getReplyString());
} catch (FTPConnectionClosedException e) {
// disconnect();
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
}
}
use of org.apache.commons.net.ftp.FTPFile in project nutch by apache.
the class FtpResponse method getFileAsHttpResponse.
// get ftp file as http response
private void getFileAsHttpResponse(String path, long lastModified) throws IOException {
ByteArrayOutputStream os = null;
List<FTPFile> list = null;
try {
// first get its possible attributes
list = new LinkedList<FTPFile>();
ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser);
FTPFile ftpFile = (FTPFile) list.get(0);
this.headers.set(Response.CONTENT_LENGTH, Long.valueOf(ftpFile.getSize()).toString());
this.headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(ftpFile.getTimestamp()));
// don't retrieve the file if not changed.
if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
code = 304;
return;
}
os = new ByteArrayOutputStream(ftp.getBufferSize());
ftp.client.retrieveFile(path, os, ftp.maxContentLength);
this.content = os.toByteArray();
// // approximate bytes sent and read
// if (this.httpAccounting != null) {
// this.httpAccounting.incrementBytesSent(path.length());
// this.httpAccounting.incrementBytesRead(this.content.length);
// }
// http OK
this.code = 200;
} catch (FtpExceptionControlClosedByForcedDataClose e) {
// ftp.client.disconnect();
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("delete client because server cut off control channel: " + e);
}
ftp.client = null;
// thrown by retrieveList() (not retrieveFile()) above,
if (os == null) {
// throw new FtpException("fail to get attibutes: "+path);
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("Please try larger maxContentLength for ftp.client.retrieveList(). " + e);
}
// in a way, this is our request fault
// http Bad request
this.code = 400;
return;
}
FTPFile ftpFile = (FTPFile) list.get(0);
this.headers.set(Response.CONTENT_LENGTH, Long.valueOf(ftpFile.getSize()).toString());
// this.headers.put("content-type", "text/html");
this.headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(ftpFile.getTimestamp()));
this.content = os.toByteArray();
if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
code = 304;
return;
}
// // approximate bytes sent and read
// if (this.httpAccounting != null) {
// this.httpAccounting.incrementBytesSent(path.length());
// this.httpAccounting.incrementBytesRead(this.content.length);
// }
// http OK
this.code = 200;
} catch (FtpExceptionCanNotHaveDataConnection e) {
if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
// it is not a file, but dir, so redirect as a dir
this.headers.set(Response.LOCATION, path + "/");
// http redirect
this.code = 300;
// fixme, should we do ftp.client.cwd("/"), back to top dir?
} else {
// it is not a dir either
// http Not Found
this.code = 404;
}
} catch (FtpExceptionUnknownForcedDataClose e) {
// in a way, this is our request fault
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("Unrecognized reply after forced close of data channel. " + "If this is acceptable, please modify Client.java accordingly. " + e);
}
// http Bad Request
this.code = 400;
}
}
Aggregations