Search in sources :

Example 46 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project BWAPI4J by OpenBW.

the class DummyDataUtils method readIntegerArrayFromArchiveFile.

public static int[] readIntegerArrayFromArchiveFile(final String archiveFilename, final String mapHash, final String regex) throws IOException {
    final InputStream inputStream = createInputStreamForDummyDataSet(archiveFilename);
    try (final ArchiveInputStream tarIn = new TarArchiveInputStream(new BZip2CompressorInputStream(inputStream));
        final BufferedReader buffer = new BufferedReader(new InputStreamReader(tarIn))) {
        final String mapShortHash = determineMapShortHash(mapHash);
        final ArchiveEntry nextEntry = getArchiveEntry(tarIn, mapShortHash);
        Assert.assertNotNull(nextEntry);
        final int[] read = buffer.lines().flatMap(line -> (Stream<String>) Stream.of(line.split(regex))).map(String::trim).mapToInt(Integer::parseInt).toArray();
        logger.debug("Read " + read.length + " values from " + archiveFilename);
        return read;
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) List(java.util.List) Stream(java.util.stream.Stream) Logger(org.apache.logging.log4j.Logger) Paths(java.nio.file.Paths) BufferedReader(java.io.BufferedReader) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) Assert(org.junit.Assert) LogManager(org.apache.logging.log4j.LogManager) InputStream(java.io.InputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) InputStreamReader(java.io.InputStreamReader) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) Stream(java.util.stream.Stream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) InputStream(java.io.InputStream)

Example 47 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project agile-service by open-hand.

the class StaticFileCompressServiceImpl method unCompressedByApache.

/**
 * 可使用apache解压工具进行解压的流程一致,根据文件后缀名获取不同的压缩流
 *
 * @param staticFileCompress            解压参数
 * @param projectId                     项目id
 * @param organizationId                组织id
 * @param suffix                        文件后缀名
 * @param staticFileCompressHistoryList 解压操作历史记录
 * @throws IOException io错误
 */
private void unCompressedByApache(StaticFileCompressDTO staticFileCompress, Long projectId, Long organizationId, String suffix, List<StaticFileOperationHistoryDTO> staticFileCompressHistoryList) throws IOException {
    Long userId = DetailsHelper.getUserDetails().getUserId();
    StaticFileHeaderDTO update = new StaticFileHeaderDTO();
    update.setId(staticFileCompress.getId());
    int size = staticFileCompress.getSize();
    double process = 0.0;
    List<StaticFileLineDTO> lineList = new ArrayList<>();
    List<String> urls = new ArrayList<>();
    String prefixPath = staticFileCompress.getPrefixPath();
    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(staticFileCompress.getIn());
        ArchiveInputStream in = getArchiveInputStream(bufferedInputStream, suffix, staticFileCompress.getEncode())) {
        ArchiveEntry entry;
        while (Objects.nonNull(entry = in.getNextEntry())) {
            int availableSize = bufferedInputStream.available();
            if (!entry.isDirectory() && in.canReadEntryData(entry)) {
                byte[] bytes = inputToByte(in);
                int newSize = bytes.length;
                // 跳过文件夹与不能读取数据的项
                if (entry.getName().contains(MACOSX) || entry.getName().contains(DS_STORE) || newSize <= 0) {
                    // 跳过冗余文件
                    continue;
                }
                // 文件上传
                String url = fileClient.uploadFile(organizationId, FileUploadBucket.AGILE_BUCKET.bucket(), null, getEntryFileName(entry.getName()), bytes);
                urls.add(url);
                String relativePath = filePathService.generateRelativePath(url);
                StaticFileLineDTO staticFileLine = new StaticFileLineDTO(projectId, organizationId, staticFileCompress.getId(), relativePath, dealRelativePath(entry.getName(), prefixPath));
                lineList.add(staticFileLine);
            }
            process = updateProcess(staticFileCompressHistoryList, staticFileCompress.getStaticFileCompressHistory(), size, (size - availableSize), process, staticFileCompress.getIssueId());
        }
        // 获取上传的文件信息
        List<FileDTO> files = fileClient.getFiles(organizationId, FileUploadBucket.AGILE_BUCKET.bucket(), urls);
        Map<String, FileDTO> fileMap = files.stream().collect(Collectors.toMap(file -> filePathService.generateRelativePath(file.getFileUrl()), file -> file));
        lineList.forEach(line -> {
            // 设置行的文件类型及其记录其他信息
            line.setId(snowflakeHelper.next());
            line.setCreatedBy(userId);
            line.setLastUpdatedBy(userId);
            line.setFileType(fileMap.get(line.getUrl()) != null ? fileMap.get(line.getUrl()).getFileType() : null);
        });
        staticFileLineMapper.batchInsert(lineList);
        updateHistoryStatus(staticFileCompress.getStaticFileCompressHistory(), SUCCESS);
        staticFileCompress.setStatus(SUCCESS);
        sendProcess(staticFileCompressHistoryList, staticFileCompress.getStaticFileCompressHistory().getUserId(), projectId, staticFileCompress.getIssueId());
    }
}
Also used : StaticFileOperationHistoryDTO(io.choerodon.agile.infra.dto.StaticFileOperationHistoryDTO) StringUtils(org.apache.commons.lang.StringUtils) StaticFileLineDTO(io.choerodon.agile.infra.dto.StaticFileLineDTO) FileClient(org.hzero.boot.file.FileClient) BufferedInputStream(java.io.BufferedInputStream) URL(java.net.URL) StaticFileHeaderDTO(io.choerodon.agile.infra.dto.StaticFileHeaderDTO) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) io.choerodon.agile.infra.mapper(io.choerodon.agile.infra.mapper) BigDecimal(java.math.BigDecimal) Map(java.util.Map) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) RoundingMode(java.math.RoundingMode) Archive(com.github.junrar.Archive) SnowflakeHelper(io.choerodon.mybatis.helper.snowflake.SnowflakeHelper) DetailsHelper(io.choerodon.core.oauth.DetailsHelper) FilePathService(io.choerodon.agile.app.service.FilePathService) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) FileHeader(com.github.junrar.rarfile.FileHeader) List(java.util.List) Lazy(org.springframework.context.annotation.Lazy) Async(org.springframework.scheduling.annotation.Async) TypeToken(org.modelmapper.TypeToken) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArrayList(java.util.ArrayList) StaticFileCompressDTO(io.choerodon.agile.infra.dto.StaticFileCompressDTO) ModelMapper(org.modelmapper.ModelMapper) RarException(com.github.junrar.exception.RarException) FileUploadBucket(io.choerodon.agile.infra.enums.FileUploadBucket) Service(org.springframework.stereotype.Service) CommonException(io.choerodon.core.exception.CommonException) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) StaticFileOperationHistorySocketVO(io.choerodon.agile.api.vo.StaticFileOperationHistorySocketVO) Logger(org.slf4j.Logger) MalformedURLException(java.net.MalformedURLException) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) StaticFileCompressService(io.choerodon.agile.app.service.StaticFileCompressService) FileDTO(org.hzero.boot.file.dto.FileDTO) MultipartFile(org.springframework.web.multipart.MultipartFile) MessageClientC7n(io.choerodon.core.client.MessageClientC7n) Transactional(org.springframework.transaction.annotation.Transactional) InputStream(java.io.InputStream) StaticFileLineDTO(io.choerodon.agile.infra.dto.StaticFileLineDTO) FileDTO(org.hzero.boot.file.dto.FileDTO) ArrayList(java.util.ArrayList) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BufferedInputStream(java.io.BufferedInputStream) StaticFileHeaderDTO(io.choerodon.agile.infra.dto.StaticFileHeaderDTO)

Example 48 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project fess-crawler by codelibs.

the class TarExtractor method getTextInternal.

protected String getTextInternal(final InputStream in, final MimeTypeHelper mimeTypeHelper, final ExtractorFactory extractorFactory) {
    final StringBuilder buf = new StringBuilder(1000);
    ArchiveInputStream ais = null;
    try {
        ais = archiveStreamFactory.createArchiveInputStream("tar", in);
        TarArchiveEntry entry = null;
        long contentSize = 0;
        while ((entry = (TarArchiveEntry) ais.getNextEntry()) != null) {
            contentSize += entry.getSize();
            if (maxContentSize != -1 && contentSize > maxContentSize) {
                throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
            }
            final String filename = entry.getName();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
                    try {
                        final Map<String, String> map = new HashMap<>();
                        map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                        buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
                        buf.append('\n');
                    } catch (final Exception e) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Exception in an internal extractor.", e);
                        }
                    }
                }
            }
        }
    } catch (final MaxLengthExceededException e) {
        throw e;
    } catch (final Exception e) {
        if (buf.length() == 0) {
            throw new ExtractException("Could not extract a content.", e);
        }
    } finally {
        CloseableUtil.closeQuietly(ais);
    }
    return buf.toString().trim();
}
Also used : ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) HashMap(java.util.HashMap) Extractor(org.codelibs.fess.crawler.extractor.Extractor) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException)

Example 49 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project fess-crawler by codelibs.

the class ZipExtractor method getText.

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new CrawlerSystemException("The inputstream is null.");
    }
    final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
    final ExtractorFactory extractorFactory = getExtractorFactory();
    final StringBuilder buf = new StringBuilder(1000);
    try (final ArchiveInputStream ais = archiveStreamFactory.createArchiveInputStream(in.markSupported() ? in : new BufferedInputStream(in))) {
        ZipArchiveEntry entry = null;
        long contentSize = 0;
        while ((entry = (ZipArchiveEntry) ais.getNextEntry()) != null) {
            contentSize += entry.getSize();
            if (maxContentSize != -1 && contentSize > maxContentSize) {
                throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
            }
            final String filename = entry.getName();
            final String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null) {
                final Extractor extractor = extractorFactory.getExtractor(mimeType);
                if (extractor != null) {
                    try {
                        final Map<String, String> map = new HashMap<>();
                        map.put(ExtractData.RESOURCE_NAME_KEY, filename);
                        buf.append(extractor.getText(new IgnoreCloseInputStream(ais), map).getContent());
                        buf.append('\n');
                    } catch (final Exception e) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Exception in an internal extractor.", e);
                        }
                    }
                }
            }
        }
    } catch (final MaxLengthExceededException e) {
        throw e;
    } catch (final Exception e) {
        if (buf.length() == 0) {
            throw new ExtractException("Could not extract a content.", e);
        }
    }
    return new ExtractData(buf.toString().trim());
}
Also used : ExtractException(org.codelibs.fess.crawler.exception.ExtractException) ExtractData(org.codelibs.fess.crawler.entity.ExtractData) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) HashMap(java.util.HashMap) MimeTypeHelper(org.codelibs.fess.crawler.helper.MimeTypeHelper) ExtractorFactory(org.codelibs.fess.crawler.extractor.ExtractorFactory) ExtractException(org.codelibs.fess.crawler.exception.ExtractException) MaxLengthExceededException(org.codelibs.fess.crawler.exception.MaxLengthExceededException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) BufferedInputStream(java.io.BufferedInputStream) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) Extractor(org.codelibs.fess.crawler.extractor.Extractor) IgnoreCloseInputStream(org.codelibs.fess.crawler.util.IgnoreCloseInputStream)

Example 50 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project selenium_java by sergueik.

the class FileExtractor method untarFolder.

private String untarFolder(InputStream compressedFileInputStream, String destinationFolder, List<String> possibleFilenames) throws IOException {
    String executablePath = "";
    ArchiveEntry currentFile;
    ArchiveInputStream archiveInputStream = new TarArchiveInputStream(compressedFileInputStream);
    CloseShieldInputStream notClosableArchiveInputStream = new CloseShieldInputStream(archiveInputStream);
    try {
        while ((currentFile = archiveInputStream.getNextEntry()) != null) {
            String name = currentFile.getName();
            name = this.handlePathCreation(name, destinationFolder);
            if (name.length() > 0) {
                String extractedFile = copyFileToDisk(notClosableArchiveInputStream, destinationFolder, name);
                for (String expectedFileName : possibleFilenames) {
                    if (extractedFile.endsWith(expectedFileName)) {
                        executablePath = extractedFile;
                    }
                }
            }
        }
    } finally {
        compressedFileInputStream.close();
        notClosableArchiveInputStream.close();
    }
    return executablePath;
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Aggregations

ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)117 InputStream (java.io.InputStream)62 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)57 BufferedInputStream (java.io.BufferedInputStream)52 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)50 File (java.io.File)46 ZipArchiveEntry (org.apache.commons.compress.archivers.zip.ZipArchiveEntry)32 ArchiveOutputStream (org.apache.commons.compress.archivers.ArchiveOutputStream)29 Test (org.junit.jupiter.api.Test)28 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)27 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)26 ZipFile (org.apache.commons.compress.archivers.zip.ZipFile)26 IOException (java.io.IOException)25 ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)25 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)16 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)16 ArrayList (java.util.ArrayList)14 JarArchiveEntry (org.apache.commons.compress.archivers.jar.JarArchiveEntry)14 FileInputStream (java.io.FileInputStream)13 Path (java.nio.file.Path)13