Search in sources :

Example 11 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileLoadAction method load.

/**
     * 返回结果为已处理成功的记录
     */
public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) {
    if (false == rootDir.exists()) {
        throw new LoadException(rootDir.getPath() + " is not exist");
    }
    FileLoadContext context = buildContext(fileBatch.getIdentity());
    context.setPrepareDatas(fileBatch.getFiles());
    boolean isDryRun = context.getPipeline().getParameters().isDryRun();
    try {
        // 复制成功的文件信息
        WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles());
        List<Long> weights = buckets.weights();
        controller.start(weights);
        // 处理数据
        for (int i = 0; i < weights.size(); i++) {
            Long weight = weights.get(i);
            controller.await(weight.intValue());
            if (logger.isInfoEnabled()) {
                logger.debug("##start load for weight:{}\n", weight);
            }
            // 处理同一个weight下的数据
            List<FileData> items = buckets.getItems(weight);
            if (context.getPipeline().getParameters().isDryRun()) {
                dryRun(context, items, rootDir);
            } else {
                moveFiles(context, items, rootDir);
            }
            controller.single(weight.intValue());
            if (logger.isInfoEnabled()) {
                logger.debug("##end load for weight:{}\n", weight);
            }
        }
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("successed", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("error", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
    } catch (Exception e) {
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("error", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
        throw new LoadException(e);
    } finally {
        // 不论是否移动成功,删除临时目录
        NioUtils.delete(rootDir, 3);
    }
    return context;
}
Also used : FileLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext) FileData(com.alibaba.otter.shared.etl.model.FileData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) IOException(java.io.IOException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 12 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileLoadAction method buildWeightBuckets.

/**
     * 构建基于weight权重分组的item集合列表
     */
private WeightBuckets<FileData> buildWeightBuckets(Identity identity, List<FileData> datas) {
    WeightBuckets<FileData> buckets = new WeightBuckets<FileData>();
    for (FileData data : datas) {
        // 获取对应的weight
        DataMediaPair pair = ConfigHelper.findDataMediaPair(getPipeline(identity), data.getPairId());
        buckets.addItem(pair.getPushWeight(), data);
    }
    return buckets;
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) WeightBuckets(com.alibaba.otter.node.etl.load.loader.weight.WeightBuckets) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 13 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileExtractor method extract.

public void extract(DbBatch dbBatch) throws ExtractException {
    List<FileData> fileDatas = doFileExtract(dbBatch.getRowBatch());
    FileBatch fileBatch = new FileBatch();
    fileBatch.setFiles(fileDatas);
    Identity identity = new Identity();
    identity.setChannelId(dbBatch.getRowBatch().getIdentity().getChannelId());
    identity.setPipelineId(dbBatch.getRowBatch().getIdentity().getPipelineId());
    identity.setProcessId(dbBatch.getRowBatch().getIdentity().getProcessId());
    fileBatch.setIdentity(identity);
    dbBatch.setFileBatch(fileBatch);
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) Identity(com.alibaba.otter.shared.etl.model.Identity) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 14 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class FileExtractor method doFileDetectCollector.

private void doFileDetectCollector(Pipeline pipeline, List<FileData> fileDatas) {
    ExecutorTemplate executorTemplate = executorTemplateGetter.get();
    try {
        executorTemplate.start();
        // 重新设置下poolSize
        executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
        for (final FileData fileData : fileDatas) {
            // 提交进行多线程处理
            executorTemplate.submit(new Runnable() {

                public void run() {
                    boolean isAranda = StringUtils.isNotEmpty(fileData.getNameSpace());
                    int count = 0;
                    Throwable exception = null;
                    while (count++ < retry) {
                        try {
                            if (isAranda) {
                                // remote file
                                throw new RuntimeException(fileData + " is not support!");
                            } else {
                                // 处理本地文件
                                File file = new File(fileData.getPath());
                                fileData.setLastModifiedTime(file.lastModified());
                                fileData.setSize(file.length());
                            }
                            // 没有异常就退出
                            return;
                        } catch (Exception e) {
                            fileData.setLastModifiedTime(Long.MIN_VALUE);
                            fileData.setSize(Long.MIN_VALUE);
                            exception = e;
                        }
                    }
                    if (count >= retry) {
                        logger.warn(String.format("FileDetectCollector is error! collect failed[%s]", fileData.getNameSpace() + "/" + fileData.getPath()), exception);
                    }
                }
            });
        }
        long start = System.currentTimeMillis();
        logger.info("start pipelinep[{}] waitFor FileData Size : {} ", pipeline.getId(), fileDatas.size());
        // 等待所有都处理完成
        executorTemplate.waitForResult();
        logger.info("end pipelinep[{}] waitFor FileData cost : {} ms ", pipeline.getId(), (System.currentTimeMillis() - start));
    } finally {
        if (executorTemplate != null) {
            executorTemplateGetter.release(executorTemplate);
        }
    }
}
Also used : ExecutorTemplate(com.alibaba.otter.shared.common.utils.thread.ExecutorTemplate) FileData(com.alibaba.otter.shared.etl.model.FileData) File(java.io.File) ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException)

Example 15 with FileData

use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.

the class AttachmentHttpPipe method archiveFile.

// 处理对应的附件
private HttpPipeKey archiveFile(final FileBatch fileBatch) {
    // 处理构造对应的文件url
    String filename = buildFileName(fileBatch.getIdentity(), ClassUtils.getShortClassName(fileBatch.getClass()));
    File file = new File(htdocsDir, filename);
    // 压缩对应的文件数据
    List<FileData> fileDatas = fileBatch.getFiles();
    Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
    int poolSize = pipeline.getParameters().getFileLoadPoolSize();
    boolean useLocalFileMutliThread = pipeline.getParameters().getUseLocalFileMutliThread();
    ArchiveBean archiveBean = getArchiveBean();
    // 调整线程池大小
    archiveBean.adjustPoolSize(poolSize);
    // 设置是否启用local多线程同步
    archiveBean.setUseLocalFileMutliThread(useLocalFileMutliThread);
    boolean done = archiveBean.pack(file, fileDatas, new ArchiveRetriverCallback<FileData>() {

        public InputStream retrive(FileData fileData) {
            boolean miss = false;
            try {
                if (StringUtils.isNotEmpty(fileData.getNameSpace())) {
                    throw new RuntimeException(fileData + " is not support!");
                } else {
                    File source = new File(fileData.getPath());
                    if (source.exists() && source.isFile()) {
                        return new LazyFileInputStream(source);
                    } else {
                        miss = true;
                        return null;
                    }
                }
            } finally {
                if (miss && logger.isInfoEnabled()) {
                    MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
                    logger.info(FileloadDumper.dumpMissFileDatas(fileBatch.getIdentity(), fileData));
                }
            }
        }
    });
    if (done == false) {
        // 直接返回
        return null;
    }
    HttpPipeKey key = new HttpPipeKey();
    key.setUrl(remoteUrlBuilder.getUrl(fileBatch.getIdentity().getPipelineId(), filename));
    key.setDataType(PipeDataType.FILE_BATCH);
    key.setIdentity(fileBatch.getIdentity());
    if (encrypt || pipeline.getParameters().getUseFileEncrypt()) {
        // 加密处理
        EncryptedData encryptedData = encryptFile(file);
        key.setKey(encryptedData.getKey());
        key.setCrc(encryptedData.getCrc());
    }
    return key;
}
Also used : LazyFileInputStream(com.alibaba.otter.node.etl.common.pipe.impl.http.archive.LazyFileInputStream) InputStream(java.io.InputStream) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) LazyFileInputStream(com.alibaba.otter.node.etl.common.pipe.impl.http.archive.LazyFileInputStream) EncryptedData(com.alibaba.otter.node.etl.common.io.EncryptedData) File(java.io.File) FileData(com.alibaba.otter.shared.etl.model.FileData) ArchiveBean(com.alibaba.otter.node.etl.common.pipe.impl.http.archive.ArchiveBean)

Aggregations

FileData (com.alibaba.otter.shared.etl.model.FileData)24 File (java.io.File)15 ArrayList (java.util.ArrayList)10 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)9 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)8 IOException (java.io.IOException)8 InputStream (java.io.InputStream)6 Identity (com.alibaba.otter.shared.etl.model.Identity)5 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)4 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)4 EventData (com.alibaba.otter.shared.etl.model.EventData)4 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)4 FileInputStream (java.io.FileInputStream)4 BaseOtterTest (com.alibaba.otter.node.etl.BaseOtterTest)3 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)3 ArchiveBean (com.alibaba.otter.node.etl.common.pipe.impl.http.archive.ArchiveBean)3 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)3 BatchObject (com.alibaba.otter.shared.etl.model.BatchObject)3 FileNotFoundException (java.io.FileNotFoundException)3 FileOutputStream (java.io.FileOutputStream)3