Search in sources :

Example 1 with PipeKey

use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.

the class TransformTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
            Runnable task = new Runnable() {

                @Override
                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
                    try {
                        // 后续可判断同步数据是否为rowData
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
                            return;
                        }
                        // 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
                        // 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
                        Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
                        // 可能存在同一个Pipeline下有Mq和Db两种同步类型
                        dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
                        if (dbBatch.getFileBatch() != null) {
                            Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
                            dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
                        }
                        // 传递给下一个流程
                        List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
                        etlEventData.setDesc(nextKeys);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        // 处理完成后通知single已完成
                        arbitrateEventService.transformEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
                            sendRollbackTermin(pipelineId, e);
                        } else {
                            logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
                return;
            } else {
                logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) BatchObject(com.alibaba.otter.shared.etl.model.BatchObject) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Example 2 with PipeKey

use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.

the class RowDataPipeDelegate method get.

public DbBatch get(List<PipeKey> keys) {
    Assert.notNull(keys);
    DbBatch dbBatch = new DbBatch();
    Future<File> future = null;
    for (final PipeKey key : keys) {
        if (key == null) {
            // 忽略空的key
            continue;
        }
        if (key instanceof MemoryPipeKey) {
            dbBatch = rowDataMemoryPipe.get((MemoryPipeKey) key);
            // 直接返回
            return dbBatch;
        } else if (key instanceof HttpPipeKey) {
            if (key.getDataType().isDbBatch()) {
                // 区分一下数据下载
                dbBatch = rowDataHttpPipe.get((HttpPipeKey) key);
            } else {
                future = executorService.submit(new Callable<File>() {

                    public File call() throws Exception {
                        try {
                            HttpPipeKey pipeKey = (HttpPipeKey) key;
                            MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeKey.getIdentity().getPipelineId()));
                            return attachmentHttpPipe.get(pipeKey);
                        } finally {
                            MDC.remove(OtterConstants.splitPipelineLogFileKey);
                        }
                    }
                });
            }
        } else if (key instanceof RpcPipeKey) {
            dbBatch = rowDataRpcPipe.get((RpcPipeKey) key);
        } else {
            throw new PipeException("unknow_PipeKey", key.toString());
        }
    }
    if (future != null && dbBatch != null) {
        try {
            dbBatch.setRoot(future.get());
        } catch (Exception e) {
            throw new PipeException(e);
        }
    }
    return dbBatch;
}
Also used : HttpPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey) MemoryPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey) MemoryPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey) RpcPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey) HttpPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) File(java.io.File) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) RpcPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey)

Example 3 with PipeKey

use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.

the class RowDataPipeDelegate method put.

/**
     * 将对应的数据传递到指定的Node id节点上
     */
public List<PipeKey> put(final DbBatch data, Long nid) throws PipeException {
    List<PipeKey> keys = new ArrayList<PipeKey>();
    if (isLocal(nid)) {
        keys.add(rowDataMemoryPipe.put(data));
    } else {
        Future<PipeKey> future = null;
        Pipeline pipeline = configClientService.findPipeline(data.getRowBatch().getIdentity().getPipelineId());
        if (data.getFileBatch() != null && !CollectionUtils.isEmpty(data.getFileBatch().getFiles())) {
            future = executorService.submit(new Callable<PipeKey>() {

                public PipeKey call() throws Exception {
                    try {
                        MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(data.getFileBatch().getIdentity().getPipelineId()));
                        return attachmentHttpPipe.put(data.getFileBatch());
                    } finally {
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            });
        }
        try {
            PipeChooseMode pipeChooseMode = pipeline.getParameters().getPipeChooseType();
            if (pipeChooseMode.isAutomatic()) {
                if (calculateSize(data) <= sizeThresold) {
                    keys.add(rowDataRpcPipe.put(data));
                } else {
                    keys.add(rowDataHttpPipe.put(data));
                }
            } else if (pipeChooseMode.isRpc()) {
                keys.add(rowDataRpcPipe.put(data));
            } else if (pipeChooseMode.isHttp()) {
                keys.add(rowDataHttpPipe.put(data));
            } else {
                throw new PipeException("pipeChooseMode is error!" + pipeChooseMode);
            }
            // 等待一下附件处理
            if (future != null) {
                keys.add(future.get());
            }
        } catch (Exception e) {
            throw new PipeException(e);
        }
    }
    return keys;
}
Also used : PipeChooseMode(com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter.PipeChooseMode) ArrayList(java.util.ArrayList) MemoryPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey) RpcPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey) HttpPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) Callable(java.util.concurrent.Callable) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 4 with PipeKey

use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.

the class ExtractTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.extractEvent().await(pipelineId);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "ExtractWorker"));
                    try {
                        pipeline = configClientService.findPipeline(pipelineId);
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        long nextNodeId = etlEventData.getNextNid();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "extract miss data with keys:" + keys.toString());
                            return;
                        }
                        // 重新装配一下数据
                        otterExtractorFactory.extract(dbBatch);
                        if (dbBatch.getFileBatch() != null && !CollectionUtils.isEmpty(dbBatch.getFileBatch().getFiles()) && pipeline.getParameters().getFileDetect()) {
                            // 判断一下是否有文件同步,并且需要进行文件对比
                            // 对比一下中美图片是否有变化
                            FileBatch fileBatch = fileBatchConflictDetectService.detect(dbBatch.getFileBatch(), nextNodeId);
                            dbBatch.setFileBatch(fileBatch);
                        }
                        List<PipeKey> pipeKeys = rowDataPipeDelegate.put(dbBatch, nextNodeId);
                        etlEventData.setDesc(pipeKeys);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.EXTRACT, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        arbitrateEventService.extractEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%d] extractwork executor is error! data:%s", pipelineId, etlEventData), e);
                            sendRollbackTermin(pipelineId, e);
                        } else {
                            logger.info(String.format("[%d] extractwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.EXTRACT, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] extractTask is interrupted!", pipelineId), e);
                return;
            } else {
                logger.error(String.format("[%s] extractTask is error!", pipelineId), e);
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List)

Example 5 with PipeKey

use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.

the class LoadTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
                    List<LoadContext> processedContexts = null;
                    try {
                        // 后续可判断同步数据是否为rowData
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "load miss data with keys:" + keys.toString());
                            return;
                        }
                        // 进行数据load处理
                        otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
                        processedContexts = otterLoaderFactory.load(dbBatch);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        // 处理完成后通知single已完成
                        arbitrateEventService.loadEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
                        } else {
                            logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                        if (processedContexts != null) {
                            // 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
                            for (LoadContext context : processedContexts) {
                                try {
                                    if (context instanceof DbLoadContext) {
                                        dbLoadInterceptor.error((DbLoadContext) context);
                                    }
                                } catch (Throwable ie) {
                                    logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
                                }
                            }
                        }
                        if (!isInterrupt(e)) {
                            sendRollbackTermin(pipelineId, e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
                // 释放锁
                return;
            } else {
                logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
                // arbitrateEventService.loadEvent().release(pipelineId); //
                // 释放锁
                // 先解除lock,后发送rollback信号
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) LoadContext(com.alibaba.otter.node.etl.load.loader.LoadContext) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Aggregations

PipeKey (com.alibaba.otter.node.etl.common.pipe.PipeKey)6 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)5 AggregationItem (com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem)4 EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)4 SetlFuture (com.alibaba.otter.node.etl.extract.SetlFuture)3 List (java.util.List)3 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)2 HttpPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey)2 MemoryPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey)2 RpcPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey)2 LoadContext (com.alibaba.otter.node.etl.load.loader.LoadContext)1 DbLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext)1 Message (com.alibaba.otter.node.etl.select.selector.Message)1 TerminEventData (com.alibaba.otter.shared.arbitrate.model.TerminEventData)1 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)1 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)1 PipeChooseMode (com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter.PipeChooseMode)1 BatchObject (com.alibaba.otter.shared.etl.model.BatchObject)1 EventData (com.alibaba.otter.shared.etl.model.EventData)1 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)1