Search in sources :

Example 36 with FTPFile

use of org.apache.commons.net.ftp.FTPFile in project fess-crawler by codelibs.

the class FtpClient method getResponseData.

protected ResponseData getResponseData(final String uri, final boolean includeContent) {
    final ResponseData responseData = new ResponseData();
    FTPClient client = null;
    try {
        responseData.setMethod(Constants.GET_METHOD);
        final FtpInfo ftpInfo = new FtpInfo(uri);
        responseData.setUrl(ftpInfo.toUrl());
        client = getClient(ftpInfo);
        FTPFile file = null;
        client.changeWorkingDirectory(ftpInfo.getParent());
        validateRequest(client);
        if (ftpInfo.getName() == null) {
            // root directory
            final Set<RequestData> requestDataSet = new HashSet<>();
            if (includeContent) {
                try {
                    final FTPFile[] files = client.listFiles(ftpInfo.getParent(), FTPFileFilters.NON_NULL);
                    validateRequest(client);
                    for (final FTPFile f : files) {
                        final String chileUri = ftpInfo.toChildUrl(f.getName());
                        requestDataSet.add(RequestDataBuilder.newRequestData().get().url(chileUri).build());
                    }
                } catch (final IOException e) {
                    disconnectInternalClient(client);
                    throw new CrawlingAccessException("Could not access " + uri, e);
                }
            }
            ftpClientQueue.offer(client);
            throw new ChildUrlsException(requestDataSet, this.getClass().getName() + "#getResponseData");
        }
        final FTPFile[] files = client.listFiles(null, FTPFileFilters.NON_NULL);
        validateRequest(client);
        for (final FTPFile f : files) {
            if (ftpInfo.getName().equals(f.getName())) {
                file = f;
                break;
            }
        }
        updateResponseData(uri, includeContent, responseData, client, ftpInfo, file);
    } catch (final CrawlerSystemException e) {
        CloseableUtil.closeQuietly(responseData);
        throw e;
    } catch (final Exception e) {
        CloseableUtil.closeQuietly(responseData);
        throw new CrawlingAccessException("Could not access " + uri, e);
    }
    return responseData;
}
Also used : ChildUrlsException(org.codelibs.fess.crawler.exception.ChildUrlsException) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) FTPFile(org.apache.commons.net.ftp.FTPFile) IOException(java.io.IOException) FTPClient(org.apache.commons.net.ftp.FTPClient) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) CrawlerLoginFailureException(org.codelibs.fess.crawler.exception.CrawlerLoginFailureException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ChildUrlsException(org.codelibs.fess.crawler.exception.ChildUrlsException) RequestData(org.codelibs.fess.crawler.entity.RequestData) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) HashSet(java.util.HashSet)

Example 37 with FTPFile

use of org.apache.commons.net.ftp.FTPFile in project DataX by alibaba.

the class StandardFtpHelper method getListFiles.

@Override
public HashSet<String> getListFiles(String directoryPath, int parentLevel, int maxTraversalLevel) {
    if (parentLevel < maxTraversalLevel) {
        // 父级目录,以'/'结尾
        String parentPath = null;
        int pathLen = directoryPath.length();
        if (directoryPath.contains("*") || directoryPath.contains("?")) {
            // path是正则表达式
            String subPath = UnstructuredStorageReaderUtil.getRegexPathParentPath(directoryPath);
            if (isDirExist(subPath)) {
                parentPath = subPath;
            } else {
                String message = String.format("不能进入目录:[%s]," + "请确认您的配置项path:[%s]存在,且配置的用户有权限进入", subPath, directoryPath);
                LOG.error(message);
                throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
            }
        } else if (isDirExist(directoryPath)) {
            // path是目录
            if (directoryPath.charAt(pathLen - 1) == IOUtils.DIR_SEPARATOR) {
                parentPath = directoryPath;
            } else {
                parentPath = directoryPath + IOUtils.DIR_SEPARATOR;
            }
        } else if (isFileExist(directoryPath)) {
            // path指向具体文件
            sourceFiles.add(directoryPath);
            return sourceFiles;
        } else if (isSymbolicLink(directoryPath)) {
            // path是链接文件
            String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", directoryPath);
            LOG.error(message);
            throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
        } else {
            String message = String.format("请确认您的配置项path:[%s]存在,且配置的用户有权限读取", directoryPath);
            LOG.error(message);
            throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
        }
        try {
            FTPFile[] fs = ftpClient.listFiles(new String(directoryPath.getBytes(), FTP.DEFAULT_CONTROL_ENCODING));
            for (FTPFile ff : fs) {
                String strName = ff.getName();
                String filePath = parentPath + strName;
                if (ff.isDirectory()) {
                    if (!(strName.equals(".") || strName.equals(".."))) {
                        // 递归处理
                        getListFiles(filePath, parentLevel + 1, maxTraversalLevel);
                    }
                } else if (ff.isFile()) {
                    // 是文件
                    sourceFiles.add(filePath);
                } else if (ff.isSymbolicLink()) {
                    // 是链接文件
                    String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", filePath);
                    LOG.error(message);
                    throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
                } else {
                    String message = String.format("请确认path:[%s]存在,且配置的用户有权限读取", filePath);
                    LOG.error(message);
                    throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
                }
            }
        // end for FTPFile
        } catch (IOException e) {
            String message = String.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常", directoryPath);
            LOG.error(message);
            throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
        }
        return sourceFiles;
    } else {
        // 超出最大递归层数
        String message = String.format("获取path:[%s] 下文件列表时超出最大层数,请确认路径[%s]下不存在软连接文件", directoryPath, directoryPath);
        LOG.error(message);
        throw DataXException.asDataXException(FtpReaderErrorCode.OUT_MAX_DIRECTORY_LEVEL, message);
    }
}
Also used : FTPFile(org.apache.commons.net.ftp.FTPFile) IOException(java.io.IOException)

Example 38 with FTPFile

use of org.apache.commons.net.ftp.FTPFile in project DataX by alibaba.

the class StandardFtpHelperImpl method getAllFilesInDir.

@Override
public Set<String> getAllFilesInDir(String dir, String prefixFileName) {
    Set<String> allFilesWithPointedPrefix = new HashSet<String>();
    try {
        boolean isDirExist = this.ftpClient.changeWorkingDirectory(dir);
        if (!isDirExist) {
            throw DataXException.asDataXException(FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, String.format("进入目录[%s]失败", dir));
        }
        this.printWorkingDirectory();
        FTPFile[] fs = this.ftpClient.listFiles(dir);
        // LOG.debug(JSON.toJSONString(this.ftpClient.listNames(dir)));
        LOG.debug(String.format("ls: %s", JSON.toJSONString(fs, SerializerFeature.UseSingleQuotes)));
        for (FTPFile ff : fs) {
            String strName = ff.getName();
            if (strName.startsWith(prefixFileName)) {
                allFilesWithPointedPrefix.add(strName);
            }
        }
    } catch (IOException e) {
        String message = String.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常,拥有目录ls权限, errorMessage:%s", dir, e.getMessage());
        LOG.error(message);
        throw DataXException.asDataXException(FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
    }
    return allFilesWithPointedPrefix;
}
Also used : FTPFile(org.apache.commons.net.ftp.FTPFile) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 39 with FTPFile

use of org.apache.commons.net.ftp.FTPFile in project nutch by apache.

the class Client method retrieveList.

/**
 * Retrieve list reply for path
 *
 * @param path a path on the FTP server
 * @param entries a initialized {@link List} of
 * {@link FTPFile}'s to populate with entries found at the path
 * @param limit optionally impose a download limit if this value
 * is &gt;= 0, otherwise no limit
 * @param parser a configured {@link FTPFileEntryParser}
 * @throws IOException if there is a fatal I/O error, could be related to
 * opening a passive data connection or retrieving data from the specified path
 * @throws FtpExceptionCanNotHaveDataConnection if an error occurs whilst
 * opening a passive data connection
 * @throws FtpExceptionUnknownForcedDataClose if there is a bad reply from the
 * FTP server
 * @throws FtpExceptionControlClosedByForcedDataClose some ftp servers will
 * close control channel if data channel socket is closed by our end before
 * all data has been read out
 */
public void retrieveList(String path, List<FTPFile> entries, int limit, FTPFileEntryParser parser) throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose, FtpExceptionControlClosedByForcedDataClose {
    Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
    if (socket == null)
        throw new FtpExceptionCanNotHaveDataConnection("LIST " + ((path == null) ? "" : path));
    BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));
    // force-close data channel socket, when download limit is reached
    // boolean mandatory_close = false;
    // List entries = new LinkedList();
    int count = 0;
    String line = parser.readNextEntry(reader);
    while (line != null) {
        FTPFile ftpFile = parser.parseFTPEntry(line);
        // skip non-formatted lines
        if (ftpFile == null) {
            line = parser.readNextEntry(reader);
            continue;
        }
        entries.add(ftpFile);
        count += line.length();
        // here, cut off is up to the line when total bytes is just over limit
        if (limit >= 0 && count > limit) {
            // mandatory_close = true;
            break;
        }
        line = parser.readNextEntry(reader);
    }
    // if (mandatory_close)
    // you always close here, no matter mandatory_close or not.
    // however different ftp servers respond differently, see below.
    socket.close();
    try {
        int reply = getReply();
        if (!_notBadReply(reply))
            throw new FtpExceptionUnknownForcedDataClose(getReplyString());
    } catch (FTPConnectionClosedException e) {
        // disconnect();
        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) FTPConnectionClosedException(org.apache.commons.net.ftp.FTPConnectionClosedException) BufferedReader(java.io.BufferedReader) FTPFile(org.apache.commons.net.ftp.FTPFile) Socket(java.net.Socket)

Example 40 with FTPFile

use of org.apache.commons.net.ftp.FTPFile in project nutch by apache.

the class FtpResponse method getFileAsHttpResponse.

// get ftp file as http response
private void getFileAsHttpResponse(String path, long lastModified) throws IOException {
    ByteArrayOutputStream os = null;
    List<FTPFile> list = null;
    try {
        // first get its possible attributes
        list = new LinkedList<FTPFile>();
        ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser);
        FTPFile ftpFile = (FTPFile) list.get(0);
        this.headers.set(Response.CONTENT_LENGTH, Long.valueOf(ftpFile.getSize()).toString());
        this.headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(ftpFile.getTimestamp()));
        // don't retrieve the file if not changed.
        if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
            code = 304;
            return;
        }
        os = new ByteArrayOutputStream(ftp.getBufferSize());
        ftp.client.retrieveFile(path, os, ftp.maxContentLength);
        this.content = os.toByteArray();
        // // approximate bytes sent and read
        // if (this.httpAccounting != null) {
        // this.httpAccounting.incrementBytesSent(path.length());
        // this.httpAccounting.incrementBytesRead(this.content.length);
        // }
        // http OK
        this.code = 200;
    } catch (FtpExceptionControlClosedByForcedDataClose e) {
        // ftp.client.disconnect();
        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
            Ftp.LOG.info("delete client because server cut off control channel: " + e);
        }
        ftp.client = null;
        // thrown by retrieveList() (not retrieveFile()) above,
        if (os == null) {
            // throw new FtpException("fail to get attibutes: "+path);
            if (Ftp.LOG.isWarnEnabled()) {
                Ftp.LOG.warn("Please try larger maxContentLength for ftp.client.retrieveList(). " + e);
            }
            // in a way, this is our request fault
            // http Bad request
            this.code = 400;
            return;
        }
        FTPFile ftpFile = (FTPFile) list.get(0);
        this.headers.set(Response.CONTENT_LENGTH, Long.valueOf(ftpFile.getSize()).toString());
        // this.headers.put("content-type", "text/html");
        this.headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(ftpFile.getTimestamp()));
        this.content = os.toByteArray();
        if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
            code = 304;
            return;
        }
        // // approximate bytes sent and read
        // if (this.httpAccounting != null) {
        // this.httpAccounting.incrementBytesSent(path.length());
        // this.httpAccounting.incrementBytesRead(this.content.length);
        // }
        // http OK
        this.code = 200;
    } catch (FtpExceptionCanNotHaveDataConnection e) {
        if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
            // it is not a file, but dir, so redirect as a dir
            this.headers.set(Response.LOCATION, path + "/");
            // http redirect
            this.code = 300;
        // fixme, should we do ftp.client.cwd("/"), back to top dir?
        } else {
            // it is not a dir either
            // http Not Found
            this.code = 404;
        }
    } catch (FtpExceptionUnknownForcedDataClose e) {
        // in a way, this is our request fault
        if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG.warn("Unrecognized reply after forced close of data channel. " + "If this is acceptable, please modify Client.java accordingly. " + e);
        }
        // http Bad Request
        this.code = 400;
    }
}
Also used : FTPFile(org.apache.commons.net.ftp.FTPFile) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Aggregations

FTPFile (org.apache.commons.net.ftp.FTPFile)120 IOException (java.io.IOException)59 FTPClient (org.apache.commons.net.ftp.FTPClient)34 Test (org.junit.Test)32 File (java.io.File)28 InputStream (java.io.InputStream)16 ArrayList (java.util.ArrayList)15 FrameworkException (org.structr.common.error.FrameworkException)15 Tx (org.structr.core.graph.Tx)15 FtpTest (org.structr.web.files.FtpTest)15 FileOutputStream (java.io.FileOutputStream)11 OutputStream (java.io.OutputStream)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 BuildException (org.apache.tools.ant.BuildException)8 List (java.util.List)7 Matchers.containsString (org.hamcrest.Matchers.containsString)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 BeanFactory (org.springframework.beans.factory.BeanFactory)5 LiteralExpression (org.springframework.expression.common.LiteralExpression)5 HashSet (java.util.HashSet)4