Search in sources :

Example 6 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class ArchiveBean method doPack.

/**
     * 执行压缩
     */
@SuppressWarnings("resource")
private boolean doPack(final File targetArchiveFile, List<FileData> fileDatas, final ArchiveRetriverCallback<FileData> callback) {
    // 首先判断下对应的目标文件是否存在,如存在则执行删除
    if (true == targetArchiveFile.exists() && false == NioUtils.delete(targetArchiveFile, 3)) {
        throw new ArchiveException(String.format("[%s] exist and delete failed", targetArchiveFile.getAbsolutePath()));
    }
    boolean exist = false;
    ZipOutputStream zipOut = null;
    Set<String> entryNames = new HashSet<String>();
    // 下载成功的任务列表
    BlockingQueue<Future<ArchiveEntry>> queue = new LinkedBlockingQueue<Future<ArchiveEntry>>();
    ExecutorCompletionService completionService = new ExecutorCompletionService(executor, queue);
    final File targetDir = new File(targetArchiveFile.getParentFile(), FilenameUtils.getBaseName(targetArchiveFile.getPath()));
    try {
        // 创建一个临时目录
        FileUtils.forceMkdir(targetDir);
        zipOut = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetArchiveFile)));
        zipOut.setLevel(Deflater.BEST_SPEED);
        // 进行并发压缩处理
        for (final FileData fileData : fileDatas) {
            if (fileData.getEventType().isDelete()) {
                // 忽略delete类型的数据打包,因为只需直接在目标进行删除
                continue;
            }
            String namespace = fileData.getNameSpace();
            String path = fileData.getPath();
            boolean isLocal = StringUtils.isBlank(namespace);
            String entryName = null;
            if (true == isLocal) {
                entryName = FilenameUtils.getPath(path) + FilenameUtils.getName(path);
            } else {
                entryName = namespace + File.separator + path;
            }
            // 过滤一些重复的文件数据同步
            if (entryNames.contains(entryName) == false) {
                entryNames.add(entryName);
            } else {
                continue;
            }
            final String name = entryName;
            if (true == isLocal && !useLocalFileMutliThread) {
                // 采用串行处理,不走临时文件
                queue.add(new DummyFuture(new ArchiveEntry(name, callback.retrive(fileData))));
            } else {
                completionService.submit(new Callable<ArchiveEntry>() {

                    public ArchiveEntry call() throws Exception {
                        // 处理下异常,可能失败
                        InputStream input = null;
                        OutputStream output = null;
                        try {
                            input = callback.retrive(fileData);
                            if (input instanceof LazyFileInputStream) {
                                // 获取原始的stream
                                input = ((LazyFileInputStream) input).getInputSteam();
                            }
                            if (input != null) {
                                File tmp = new File(targetDir, name);
                                // 尝试创建父路径
                                NioUtils.create(tmp.getParentFile(), false, 3);
                                output = new FileOutputStream(tmp);
                                // 拷贝到文件
                                NioUtils.copy(input, output);
                                return new ArchiveEntry(name, new File(targetDir, name));
                            } else {
                                return new ArchiveEntry(name);
                            }
                        } finally {
                            IOUtils.closeQuietly(input);
                            IOUtils.closeQuietly(output);
                        }
                    }
                });
            }
        }
        for (int i = 0; i < entryNames.size(); i++) {
            // 读入流
            ArchiveEntry input = null;
            InputStream stream = null;
            try {
                input = queue.take().get();
                if (input == null) {
                    continue;
                }
                stream = input.getStream();
                if (stream == null) {
                    continue;
                }
                if (stream instanceof LazyFileInputStream) {
                    // 获取原始的stream
                    stream = ((LazyFileInputStream) stream).getInputSteam();
                }
                exist = true;
                zipOut.putNextEntry(new ZipEntry(input.getName()));
                // 输出到压缩流中
                NioUtils.copy(stream, zipOut);
                zipOut.closeEntry();
            } finally {
                IOUtils.closeQuietly(stream);
            }
        }
        if (exist) {
            zipOut.finish();
        }
    } catch (Exception e) {
        throw new ArchiveException(e);
    } finally {
        IOUtils.closeQuietly(zipOut);
        try {
            // 删除临时目录
            FileUtils.deleteDirectory(targetDir);
        } catch (IOException e) {
        // ignore
        }
    }
    return exist;
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) ZipOutputStream(de.schlichtherle.util.zip.ZipOutputStream) ZipEntry(de.schlichtherle.util.zip.ZipEntry) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ExecutionException(java.util.concurrent.ExecutionException) ZipOutputStream(de.schlichtherle.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) Future(java.util.concurrent.Future) ZipFile(de.schlichtherle.util.zip.ZipFile) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) FileData(com.alibaba.otter.shared.etl.model.FileData) HashSet(java.util.HashSet)

Example 7 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileBatchConflictDetectServiceImpl method onFileConflictDetect.

/**
     * 具体冲突检测的行为
     */
private FileBatch onFileConflictDetect(FileConflictDetectEvent event) {
    final FileBatch fileBatch = event.getFileBatch();
    if (CollectionUtils.isEmpty(fileBatch.getFiles())) {
        return fileBatch;
    }
    ExecutorTemplate executorTemplate = executorTemplateGetter.get();
    try {
        MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
        executorTemplate.start();
        // 重新设置下poolSize
        Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
        executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
        // 启动
        final List<FileData> result = Collections.synchronizedList(new ArrayList<FileData>());
        final List<FileData> filter = Collections.synchronizedList(new ArrayList<FileData>());
        for (final FileData source : fileBatch.getFiles()) {
            EventType type = source.getEventType();
            if (type.isDelete()) {
                result.add(source);
            } else {
                executorTemplate.submit(new Runnable() {

                    public void run() {
                        MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
                        // 处理更新类型
                        String namespace = source.getNameSpace();
                        String path = source.getPath();
                        FileData target = null;
                        int count = 0;
                        while (count++ < retry) {
                            // 进行重试处理
                            try {
                                if (true == StringUtils.isBlank(namespace)) {
                                    // local file
                                    java.io.File targetFile = new java.io.File(path);
                                    if (true == targetFile.exists()) {
                                        // modified time cost
                                        long lastModified = targetFile.lastModified();
                                        long size = targetFile.length();
                                        // 更新数据
                                        target = new FileData();
                                        target.setLastModifiedTime(lastModified);
                                        target.setSize(size);
                                    }
                                } else {
                                    // remote file
                                    throw new RuntimeException(source + " is not support!");
                                }
                                // 不出异常就跳出
                                break;
                            } catch (Exception ex) {
                                target = null;
                            }
                        }
                        boolean shouldSync = false;
                        if (target != null) {
                            if (true == accept(target, source)) {
                                shouldSync = true;
                            }
                        } else {
                            shouldSync = true;
                        }
                        if (true == shouldSync) {
                            result.add(source);
                        } else {
                            filter.add(source);
                        }
                    }
                });
            }
        }
        // 等待所有都处理完成
        executorTemplate.waitForResult();
        if (pipeline.getParameters().getDumpEvent() && logger.isInfoEnabled()) {
            logger.info(FileloadDumper.dumpFilterFileDatas(fileBatch.getIdentity(), fileBatch.getFiles().size(), result.size(), filter));
        }
        // 构造返回结果
        FileBatch target = new FileBatch();
        target.setIdentity(fileBatch.getIdentity());
        target.setFiles(result);
        return target;
    } finally {
        if (executorTemplate != null) {
            executorTemplateGetter.release(executorTemplate);
        }
        MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) ExecutorTemplate(com.alibaba.otter.shared.common.utils.thread.ExecutorTemplate) EventType(com.alibaba.otter.shared.etl.model.EventType) ConflictEventType(com.alibaba.otter.node.etl.conflict.model.ConflictEventType) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 8 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileExtractor method doFileExtract.

/**
     * 返回这批变更数据对应的FileInfo.
     * 
     * @param rowBatch
     * @return
     */
private List<FileData> doFileExtract(RowBatch rowBatch) {
    List<FileData> fileDatas = new ArrayList<FileData>();
    // 处理数据
    Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
    List<EventData> eventDatas = rowBatch.getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
        if (dataMediaPairs == null) {
            throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
        }
        for (DataMediaPair dataMediaPair : dataMediaPairs) {
            if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
                continue;
            }
            FileResolver fileResolver = null;
            if (dataMediaPair.getResolverData() != null) {
                fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
            } else {
                continue;
            }
            if (fileResolver == null) {
                throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className  = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
            }
            if (fileResolver instanceof RemoteDirectoryFetcherAware) {
                RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
                remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
            }
            List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
            // 做一下去重处理
            for (FileData data : singleRowFileDatas) {
                if (!fileDatas.contains(data)) {
                    fileDatas.add(data);
                }
            }
        }
    }
    // 判断是否需要进行图片重复同步检查
    if (pipeline.getParameters().getFileDetect()) {
        doFileDetectCollector(pipeline, fileDatas);
    }
    return fileDatas;
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) RemoteDirectoryFetcherAware(com.alibaba.otter.shared.etl.extend.fileresolver.support.RemoteDirectoryFetcherAware) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) ArrayList(java.util.ArrayList) FileResolver(com.alibaba.otter.shared.etl.extend.fileresolver.FileResolver) FileData(com.alibaba.otter.shared.etl.model.FileData) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 9 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileExtractor method getSingleRowFileInfos.

private List<FileData> getSingleRowFileInfos(long pairId, FileResolver fileResolver, EventData eventData) {
    if (eventData.getEventType() == EventType.DELETE && fileResolver.isDeleteRequired() == false) {
        return new ArrayList<FileData>();
    }
    Map<String, String> rowMap = new HashMap<String, String>();
    List<EventColumn> keyColumns = eventData.getKeys();
    List<EventColumn> eventColumns = eventData.getUpdatedColumns();
    for (EventColumn eventColumn : keyColumns) {
        rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
    }
    for (EventColumn eventColumn : eventColumns) {
        rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
    }
    FileInfo[] fileInfos = fileResolver.getFileInfo(rowMap);
    if (fileInfos == null || fileInfos.length == 0) {
        return new ArrayList<FileData>();
    } else {
        List<FileData> fileDatas = new ArrayList<FileData>();
        for (FileInfo fileInfo : fileInfos) {
            FileData fileData = new FileData();
            // 记录一下具体映射规则的id
            fileData.setPairId(pairId);
            fileData.setTableId(eventData.getTableId());
            fileData.setEventType(eventData.getEventType());
            fileData.setLastModifiedTime(fileInfo.getLastModifiedTime());
            fileData.setNameSpace(fileInfo.getNamespace());
            fileData.setPath(fileInfo.getPath());
            fileData.setSize(fileInfo.getSize());
            fileDatas.add(fileData);
        }
        return fileDatas;
    }
}
Also used : FileInfo(com.alibaba.otter.shared.etl.extend.fileresolver.FileInfo) HashMap(java.util.HashMap) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 10 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileLoadAction method moveFiles.

/**
     * 多线程处理文件加载,使用 fast-fail 策略
     */
private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
    Exception exception = null;
    adjustPoolSize(context);
    ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>(executor);
    List<Future<Exception>> results = new ArrayList<Future<Exception>>();
    for (FileData fileData : fileDatas) {
        Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData));
        results.add(future);
        // fast fail
        if (future.isDone()) {
            // 如果是自己执行的任务(线程池采用 CallerRunsPolicy),则立刻进行检查
            try {
                exception = future.get();
            } catch (Exception e) {
                exception = e;
            }
            if (exception != null) {
                for (Future<Exception> result : results) {
                    if (!result.isDone() && !result.isCancelled()) {
                        result.cancel(true);
                    }
                }
                throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
            }
        }
    }
    int resultSize = results.size();
    int cursor = 0;
    while (cursor < resultSize) {
        try {
            Future<Exception> result = executorComplition.take();
            exception = result.get();
        } catch (Exception e) {
            exception = e;
            break;
        }
        cursor++;
    }
    if (cursor != resultSize) {
        // 发现任务出错,立刻把正在进行的任务取消
        for (Future<Exception> future : results) {
            if (!future.isDone() && !future.isCancelled()) {
                future.cancel(true);
            }
        }
    }
    if (exception != null) {
        throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
    }
}
Also used : ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) FileData(com.alibaba.otter.shared.etl.model.FileData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) IOException(java.io.IOException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Aggregations

FileData (com.alibaba.otter.shared.etl.model.FileData)24 File (java.io.File)15 ArrayList (java.util.ArrayList)10 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)9 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)8 IOException (java.io.IOException)8 InputStream (java.io.InputStream)6 Identity (com.alibaba.otter.shared.etl.model.Identity)5 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)4 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)4 EventData (com.alibaba.otter.shared.etl.model.EventData)4 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)4 FileInputStream (java.io.FileInputStream)4 BaseOtterTest (com.alibaba.otter.node.etl.BaseOtterTest)3 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)3 ArchiveBean (com.alibaba.otter.node.etl.common.pipe.impl.http.archive.ArchiveBean)3 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)3 BatchObject (com.alibaba.otter.shared.etl.model.BatchObject)3 FileNotFoundException (java.io.FileNotFoundException)3 FileOutputStream (java.io.FileOutputStream)3