Search in sources :

Example 6 with MaxLengthExceededException

use of org.codelibs.fess.crawler.exception.MaxLengthExceededException in project fess-crawler by codelibs.

the class HcHttpClient method processHttpMethod.

protected ResponseData processHttpMethod(final String url, final HttpUriRequest httpRequest) {
    try {
        processRobotsTxt(url);
    } catch (final CrawlingAccessException e) {
        if (logger.isInfoEnabled()) {
            final StringBuilder buf = new StringBuilder(100);
            buf.append(e.getMessage());
            if (e.getCause() != null) {
                buf.append(e.getCause().getMessage());
            }
            logger.info(buf.toString());
        } else if (logger.isDebugEnabled()) {
            logger.debug("Crawling Access Exception at " + url, e);
        }
    }
    // request header
    for (final Header header : requestHeaderList) {
        httpRequest.addHeader(header);
    }
    ResponseData responseData = new ResponseData();
    HttpEntity httpEntity = null;
    try {
        // get a content
        final HttpResponse response = executeHttpClient(httpRequest);
        httpEntity = response.getEntity();
        final int httpStatusCode = response.getStatusLine().getStatusCode();
        // redirect
        if (isRedirectHttpStatus(httpStatusCode)) {
            final Header locationHeader = response.getFirstHeader("location");
            if (locationHeader == null) {
                logger.warn("Invalid redirect location at " + url);
            } else {
                final String redirectLocation;
                if (locationHeader.getValue().startsWith("/")) {
                    redirectLocation = buildRedirectLocation(url, locationHeader.getValue());
                } else {
                    redirectLocation = locationHeader.getValue();
                }
                responseData = new ResponseData();
                responseData.setRedirectLocation(redirectLocation);
                return responseData;
            }
        }
        String contentType = null;
        final Header contentTypeHeader = response.getFirstHeader("Content-Type");
        if (contentTypeHeader != null) {
            contentType = contentTypeHeader.getValue();
            final int idx = contentType.indexOf(';');
            if (idx > 0) {
                contentType = contentType.substring(0, idx);
                if (APPLICATION_OCTET_STREAM.equals(contentType)) {
                    contentType = null;
                }
            }
        }
        long contentLength = 0;
        String contentEncoding = Constants.UTF_8;
        if (httpEntity == null) {
            responseData.setResponseBody(new byte[0]);
            if (contentType == null) {
                contentType = defaultMimeType;
            }
        } else {
            final InputStream responseBodyStream = httpEntity.getContent();
            final File outputFile = File.createTempFile("crawler-HcHttpClient-", ".out");
            DeferredFileOutputStream dfos = null;
            try {
                try {
                    dfos = new DeferredFileOutputStream((int) maxCachedContentSize, outputFile);
                    CopyUtil.copy(responseBodyStream, dfos);
                    dfos.flush();
                } finally {
                    CloseableUtil.closeQuietly(dfos);
                }
            } catch (final Exception e) {
                if (!outputFile.delete()) {
                    logger.warn("Could not delete " + outputFile.getAbsolutePath());
                }
                throw e;
            }
            if (dfos.isInMemory()) {
                responseData.setResponseBody(dfos.getData());
                contentLength = dfos.getData().length;
                if (!outputFile.delete()) {
                    logger.warn("Could not delete " + outputFile.getAbsolutePath());
                }
                if (contentType == null) {
                    try (InputStream is = new ByteArrayInputStream(dfos.getData())) {
                        contentType = mimeTypeHelper.getContentType(is, url);
                    } catch (final Exception e) {
                        logger.debug("Failed to detect mime-type.", e);
                        contentType = defaultMimeType;
                    }
                }
            } else {
                responseData.setResponseBody(outputFile, true);
                contentLength = outputFile.length();
                if (contentType == null) {
                    try (InputStream is = new FileInputStream(outputFile)) {
                        contentType = mimeTypeHelper.getContentType(is, url);
                    } catch (final Exception e) {
                        logger.debug("Failed to detect mime-type.", e);
                        contentType = defaultMimeType;
                    }
                }
            }
            final Header contentEncodingHeader = httpEntity.getContentEncoding();
            if (contentEncodingHeader != null) {
                contentEncoding = contentEncodingHeader.getValue();
            }
        }
        // check file size
        if (contentLengthHelper != null) {
            final long maxLength = contentLengthHelper.getMaxLength(contentType);
            if (contentLength > maxLength) {
                throw new MaxLengthExceededException("The content length (" + contentLength + " byte) is over " + maxLength + " byte. The url is " + url);
            }
        }
        responseData.setUrl(url);
        responseData.setCharSet(contentEncoding);
        if (httpRequest instanceof HttpHead) {
            responseData.setMethod(Constants.HEAD_METHOD);
        } else {
            responseData.setMethod(Constants.GET_METHOD);
        }
        responseData.setHttpStatusCode(httpStatusCode);
        for (final Header header : response.getAllHeaders()) {
            responseData.addMetaData(header.getName(), header.getValue());
        }
        responseData.setMimeType(contentType);
        final Header contentLengthHeader = response.getFirstHeader("Content-Length");
        if (contentLengthHeader == null) {
            responseData.setContentLength(contentLength);
        } else {
            final String value = contentLengthHeader.getValue();
            try {
                responseData.setContentLength(Long.parseLong(value));
            } catch (final Exception e) {
                responseData.setContentLength(contentLength);
            }
        }
        checkMaxContentLength(responseData);
        final Header lastModifiedHeader = response.getFirstHeader("Last-Modified");
        if (lastModifiedHeader != null) {
            final String value = lastModifiedHeader.getValue();
            if (StringUtil.isNotBlank(value)) {
                final Date d = parseLastModified(value);
                if (d != null) {
                    responseData.setLastModified(d);
                }
            }
        }
        return responseData;
    } catch (final UnknownHostException e) {
        closeResources(httpRequest, responseData);
        throw new CrawlingAccessException("Unknown host(" + e.getMessage() + "): " + url, e);
    } catch (final NoRouteToHostException e) {
        closeResources(httpRequest, responseData);
        throw new CrawlingAccessException("No route to host(" + e.getMessage() + "): " + url, e);
    } catch (final ConnectException e) {
        closeResources(httpRequest, responseData);
        throw new CrawlingAccessException("Connection time out(" + e.getMessage() + "): " + url, e);
    } catch (final SocketException e) {
        closeResources(httpRequest, responseData);
        throw new CrawlingAccessException("Socket exception(" + e.getMessage() + "): " + url, e);
    } catch (final IOException e) {
        closeResources(httpRequest, responseData);
        throw new CrawlingAccessException("I/O exception(" + e.getMessage() + "): " + url, e);
    } catch (final CrawlerSystemException e) {
        closeResources(httpRequest, responseData);
        throw e;
    } catch (final Exception e) {
        closeResources(httpRequest, responseData);
        throw new CrawlerSystemException("Failed to access " + url, e);
    } finally {
        EntityUtils.consumeQuietly(httpEntity);
    }
}
Also used : SocketException(java.net.SocketException) HttpEntity(org.apache.http.HttpEntity) UnknownHostException(java.net.UnknownHostException) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) HttpResponse(org.apache.http.HttpResponse) IOException(java.io.IOException) NoRouteToHostException(java.net.NoRouteToHostException) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ParseException(java.text.ParseException) NoRouteToHostException(java.net.NoRouteToHostException) SocketException(java.net.SocketException) ConnectException(java.net.ConnectException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileInputStream(java.io.FileInputStream) HttpHead(org.apache.http.client.methods.HttpHead) Date(java.util.Date) Header(org.apache.http.Header) BasicHeader(org.apache.http.message.BasicHeader) ByteArrayInputStream(java.io.ByteArrayInputStream) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) DeferredFileOutputStream(org.apache.commons.io.output.DeferredFileOutputStream) File(java.io.File) ConnectException(java.net.ConnectException)

Example 7 with MaxLengthExceededException

use of org.codelibs.fess.crawler.exception.MaxLengthExceededException in project fess-crawler by codelibs.

the class TarExtractor method getTextInternal.

protected String getTextInternal(final InputStream in, final MimeTypeHelper mimeTypeHelper, final ExtractorFactory extractorFactory) {
    final StringBuilder buf = new StringBuilder(1000);
    ArchiveInputStream ais = null;
    try {
        ais = archiveStreamFactory.createArchiveInputStream("tar", in);
        TarArchiveEntry entry = null;
        long contentSize = 0;
        while ((entry = (TarArchiveEntry) ais.getNextEntry()) != null) {
            contentSize += entry.getSize();
            if (maxContentSize != -1 && contentSize > maxContentSize) {
                throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
            }
            final String filename = entry.getName();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
                    try {
                        final Map<String, String> map = new HashMap<>();
                        map.put(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                        buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
                        buf.append('\n');
                    } catch (final Exception e) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Exception in an internal extractor.", e);
                        }
                    }
                }
            }
        }
    } catch (final MaxLengthExceededException e) {
        throw e;
    } catch (final Exception e) {
        if (buf.length() == 0) {
            throw new ExtractException("Could not extract a content.", e);
        }
    } finally {
        CloseableUtil.closeQuietly(ais);
    }
    return buf.toString().trim();
}
Also used : ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) HashMap(java.util.HashMap) Extractor(org.codelibs.fess.crawler.extractor.Extractor) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException)

Example 8 with MaxLengthExceededException

use of org.codelibs.fess.crawler.exception.MaxLengthExceededException in project fess-crawler by codelibs.

the class ZipExtractor method getText.

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new CrawlerSystemException("The inputstream is null.");
    }
    final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
    final ExtractorFactory extractorFactory = getExtractorFactory();
    final StringBuilder buf = new StringBuilder(1000);
    try (final ArchiveInputStream ais = archiveStreamFactory.createArchiveInputStream(in.markSupported() ? in : new BufferedInputStream(in))) {
        ZipArchiveEntry entry = null;
        long contentSize = 0;
        while ((entry = (ZipArchiveEntry) ais.getNextEntry()) != null) {
            contentSize += entry.getSize();
            if (maxContentSize != -1 && contentSize > maxContentSize) {
                throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
            }
            final String filename = entry.getName();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
                    try {
                        final Map<String, String> map = new HashMap<>();
                        map.put(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                        buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
                        buf.append('\n');
                    } catch (final Exception e) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Exception in an internal extractor.", e);
                        }
                    }
                }
            }
        }
    } catch (final MaxLengthExceededException e) {
        throw e;
    } catch (final Exception e) {
        if (buf.length() == 0) {
            throw new ExtractException("Could not extract a content.", e);
        }
    }
    return new ExtractData(buf.toString().trim());
}
Also used : ExtractException(org.codelibs.fess.crawler.exception.ExtractException) ExtractData(org.codelibs.fess.crawler.entity.ExtractData) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) HashMap(java.util.HashMap) MimeTypeHelper(org.codelibs.fess.crawler.helper.MimeTypeHelper) ExtractorFactory(org.codelibs.fess.crawler.extractor.ExtractorFactory) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BufferedInputStream(java.io.BufferedInputStream) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) Extractor(org.codelibs.fess.crawler.extractor.Extractor) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream)

Example 9 with MaxLengthExceededException

use of org.codelibs.fess.crawler.exception.MaxLengthExceededException in project fess-crawler by codelibs.

the class FtpClient method updateResponseData.

protected void updateResponseData(final String uri, final boolean includeContent, final ResponseData responseData, FTPClient client, final FtpInfo ftpInfo, FTPFile file) {
    if (file == null) {
        responseData.setHttpStatusCode(Constants.NOT_FOUND_STATUS_CODE);
        responseData.setCharSet(charset);
        responseData.setContentLength(0);
        ftpClientQueue.offer(client);
        return;
    }
    if (file.isSymbolicLink()) {
        final String link = file.getLink();
        String redirect = null;
        if (link == null) {
            responseData.setHttpStatusCode(Constants.BAD_REQUEST_STATUS_CODE);
            responseData.setCharSet(charset);
            responseData.setContentLength(0);
            ftpClientQueue.offer(client);
            return;
        } else if (link.startsWith("/")) {
            redirect = ftpInfo.toUrl(file.getLink());
        } else if (link.startsWith("../")) {
            redirect = ftpInfo.toChildUrl(file.getLink());
        } else {
            redirect = ftpInfo.toChildUrl("../" + file.getLink());
        }
        if (!uri.equals(redirect)) {
            responseData.setHttpStatusCode(Constants.OK_STATUS);
            responseData.setCharSet(charset);
            responseData.setContentLength(0);
            responseData.setRedirectLocation(redirect);
            ftpClientQueue.offer(client);
            return;
        }
    }
    if (file.isFile()) {
        responseData.setHttpStatusCode(Constants.OK_STATUS_CODE);
        responseData.setCharSet(Constants.UTF_8);
        responseData.setLastModified(file.getTimestamp().getTime());
        // check file size
        responseData.setContentLength(file.getSize());
        checkMaxContentLength(responseData);
        if (file.getUser() != null) {
            responseData.addMetaData(FTP_FILE_USER, file.getUser());
        }
        if (file.getGroup() != null) {
            responseData.addMetaData(FTP_FILE_GROUP, file.getGroup());
        }
        if (includeContent) {
            File tempFile = null;
            File outputFile = null;
            try {
                tempFile = File.createTempFile("ftp-", ".tmp");
                try (OutputStream out = new BufferedOutputStream(new FileOutputStream(tempFile))) {
                    if (!client.retrieveFile(ftpInfo.getName(), out)) {
                        throw new CrawlingAccessException("Failed to retrieve: " + ftpInfo.toUrl());
                    }
                }
                final MimeTypeHelper mimeTypeHelper = crawlerContainer.getComponent("mimeTypeHelper");
                try (InputStream is = new FileInputStream(tempFile)) {
                    responseData.setMimeType(mimeTypeHelper.getContentType(is, file.getName()));
                } catch (final Exception e) {
                    responseData.setMimeType(mimeTypeHelper.getContentType(null, file.getName()));
                }
                if (contentLengthHelper != null) {
                    final long maxLength = contentLengthHelper.getMaxLength(responseData.getMimeType());
                    if (responseData.getContentLength() > maxLength) {
                        throw new MaxLengthExceededException("The content length (" + responseData.getContentLength() + " byte) is over " + maxLength + " byte. The url is " + uri);
                    }
                }
                responseData.setCharSet(geCharSet(tempFile));
                if (tempFile.length() < maxCachedContentSize) {
                    try (InputStream contentStream = new BufferedInputStream(new FileInputStream(tempFile))) {
                        responseData.setResponseBody(InputStreamUtil.getBytes(contentStream));
                    }
                } else {
                    outputFile = File.createTempFile("crawler-FtpClient-", ".out");
                    CopyUtil.copy(tempFile, outputFile);
                    responseData.setResponseBody(outputFile, true);
                }
                ftpClientQueue.offer(client);
            } catch (final CrawlingAccessException e) {
                ftpClientQueue.offer(client);
                throw e;
            } catch (final Exception e) {
                logger.warn("I/O Exception.", e);
                disconnectInternalClient(client);
                responseData.setHttpStatusCode(Constants.SERVER_ERROR_STATUS_CODE);
            } finally {
                if (tempFile != null && !tempFile.delete()) {
                    logger.warn("Could not delete " + tempFile.getAbsolutePath());
                }
            }
        }
    } else if (file.isDirectory() || file.isSymbolicLink()) {
        final Set<RequestData> requestDataSet = new HashSet<>();
        if (includeContent) {
            try {
                final FTPFile[] ftpFiles = client.listFiles(ftpInfo.getName(), FTPFileFilters.NON_NULL);
                validateRequest(client);
                for (final FTPFile f : ftpFiles) {
                    final String chileUri = ftpInfo.toChildUrl(f.getName());
                    requestDataSet.add(RequestDataBuilder.newRequestData().get().url(chileUri).build());
                }
            } catch (final IOException e) {
                disconnectInternalClient(client);
                throw new CrawlingAccessException("Could not access " + uri, e);
            }
        }
        ftpClientQueue.offer(client);
        throw new ChildUrlsException(requestDataSet, this.getClass().getName() + "#getResponseData");
    } else {
        responseData.setHttpStatusCode(Constants.BAD_REQUEST_STATUS_CODE);
        responseData.setCharSet(charset);
        responseData.setContentLength(0);
        ftpClientQueue.offer(client);
    }
}
Also used : ChildUrlsException(org.codelibs.fess.crawler.exception.ChildUrlsException) HashSet(java.util.HashSet) Set(java.util.Set) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) MimeTypeHelper(org.codelibs.fess.crawler.helper.MimeTypeHelper) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FTPFile(org.apache.commons.net.ftp.FTPFile) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) CrawlerLoginFailureException(org.codelibs.fess.crawler.exception.CrawlerLoginFailureException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ChildUrlsException(org.codelibs.fess.crawler.exception.ChildUrlsException) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) FTPFile(org.apache.commons.net.ftp.FTPFile) BufferedOutputStream(java.io.BufferedOutputStream)

Example 10 with MaxLengthExceededException

use of org.codelibs.fess.crawler.exception.MaxLengthExceededException in project fess-crawler by codelibs.

the class LhaExtractor method getText.

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new CrawlerSystemException("The inputstream is null.");
    }
    final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
    final ExtractorFactory extractorFactory = getExtractorFactory();
    final StringBuilder buf = new StringBuilder(1000);
    File tempFile = null;
    LhaFile lhaFile = null;
    try {
        tempFile = File.createTempFile("crawler-", ".lzh");
        try (FileOutputStream fos = new FileOutputStream(tempFile)) {
            CopyUtil.copy(in, fos);
        }
        lhaFile = new LhaFile(tempFile);
        @SuppressWarnings("unchecked") final Enumeration<LhaHeader> entries = lhaFile.entries();
        long contentSize = 0;
        while (entries.hasMoreElements()) {
            final LhaHeader head = entries.nextElement();
            contentSize += head.getOriginalSize();
            if (maxContentSize != -1 && contentSize > maxContentSize) {
                throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
            }
            final String filename = head.getPath();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
                    InputStream is = null;
                    try {
                        is = lhaFile.getInputStream(head);
                        final Map<String, String> map = new HashMap<>();
                        map.put(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                        buf.append(extractor.getText(new IgnoreCloseInputStream(is), map).getContent());
                        buf.append('\n');
                    } catch (final Exception e) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Exception in an internal extractor.", e);
                        }
                    } finally {
                        CloseableUtil.closeQuietly(is);
                    }
                }
            }
        }
    } catch (final MaxLengthExceededException e) {
        throw e;
    } catch (final Exception e) {
        throw new ExtractException("Could not extract a content.", e);
    } finally {
        if (lhaFile != null) {
            try {
                lhaFile.close();
            } catch (final IOException e) {
            // ignore
            }
        }
        if (tempFile != null && !tempFile.delete()) {
            logger.warn("Failed to delete " + tempFile.getAbsolutePath());
        }
    }
    return new ExtractData(buf.toString().trim());
}
Also used : ExtractException(org.codelibs.fess.crawler.exception.ExtractException) ExtractData(org.codelibs.fess.crawler.entity.ExtractData) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) HashMap(java.util.HashMap) MimeTypeHelper(org.codelibs.fess.crawler.helper.MimeTypeHelper) ExtractorFactory(org.codelibs.fess.crawler.extractor.ExtractorFactory) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream) InputStream(java.io.InputStream) LhaFile(jp.gr.java_conf.dangan.util.lha.LhaFile) IOException(java.io.IOException) IOException(java.io.IOException) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) LhaHeader(jp.gr.java_conf.dangan.util.lha.LhaHeader) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) FileOutputStream(java.io.FileOutputStream) Extractor(org.codelibs.fess.crawler.extractor.Extractor) LhaFile(jp.gr.java_conf.dangan.util.lha.LhaFile) File(java.io.File) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream)

Aggregations

MaxLengthExceededException (org.codelibs.fess.crawler.exception.MaxLengthExceededException)12 InputStream (java.io.InputStream)8 CrawlerSystemException (org.codelibs.fess.crawler.exception.CrawlerSystemException)8 File (java.io.File)5 IOException (java.io.IOException)5 CrawlingAccessException (org.codelibs.fess.crawler.exception.CrawlingAccessException)5 MimeTypeHelper (org.codelibs.fess.crawler.helper.MimeTypeHelper)5 BufferedInputStream (java.io.BufferedInputStream)4 MalformedURLException (java.net.MalformedURLException)4 ResponseData (org.codelibs.fess.crawler.entity.ResponseData)4 FileInputStream (java.io.FileInputStream)3 Date (java.util.Date)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 ExtractException (org.codelibs.fess.crawler.exception.ExtractException)3 Extractor (org.codelibs.fess.crawler.extractor.Extractor)3 IgnoreCloseInputStream (org.codelibs.fess.crawler.util.IgnoreCloseInputStream)3 FileOutputStream (java.io.FileOutputStream)2 ConnectException (java.net.ConnectException)2 NoRouteToHostException (java.net.NoRouteToHostException)2