Search in sources :

Example 1 with LoadContext

use of com.alibaba.otter.node.etl.load.loader.LoadContext in project otter by alibaba.

the class DataBatchLoader method load.

public List<LoadContext> load(DbBatch data) {
    final RowBatch rowBatch = data.getRowBatch();
    final FileBatch fileBatch = data.getFileBatch();
    boolean existFileBatch = (rowBatch != null && !CollectionUtils.isEmpty(fileBatch.getFiles()) && data.getRoot() != null);
    boolean existRowBatch = (rowBatch != null && !CollectionUtils.isEmpty(rowBatch.getDatas()));
    int count = 0;
    List<RowBatch> rowBatchs = null;
    if (existRowBatch) {
        // 根据介质内容进行分类合并,每个介质一个载入通道
        rowBatchs = split(rowBatch);
        count += rowBatchs.size();
    }
    if (existFileBatch) {
        count += 1;
    }
    WeightController controller = new WeightController(count);
    List<Future> futures = new ArrayList<Future>();
    ExecutorCompletionService completionService = new ExecutorCompletionService(executorService);
    if (existFileBatch) {
        submitFileBatch(futures, completionService, fileBatch, data.getRoot(), controller);
    }
    if (existRowBatch) {
        submitRowBatch(futures, completionService, rowBatchs, controller);
    }
    // 先获取一下异步处理的结果,记录一下出错的index
    List<LoadContext> processedContexts = new ArrayList<LoadContext>();
    int index = 0;
    LoadException exception = null;
    while (index < futures.size()) {
        try {
            // 它也可能被打断
            Future future = completionService.take();
            future.get();
        } catch (InterruptedException e) {
            exception = new LoadException(e);
            break;
        } catch (ExecutionException e) {
            exception = new LoadException(e);
            break;
        }
        index++;
    }
    // 任何一个线程返回,出现了异常,就退出整个调度
    if (index < futures.size()) {
        // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
        for (int errorIndex = 0; errorIndex < futures.size(); errorIndex++) {
            Future future = futures.get(errorIndex);
            if (future.isDone()) {
                try {
                    LoadContext loadContext = (LoadContext) future.get();
                    if (loadContext instanceof DbLoadContext) {
                        // 做一下出错处理,记录到store中
                        dbInterceptor.error((DbLoadContext) loadContext);
                    }
                } catch (InterruptedException e) {
                // ignore
                } catch (ExecutionException e) {
                // ignore
                } catch (Exception e) {
                    logger.error("interceptor process error failed", e);
                }
            } else {
                // 对未完成的进行取消
                future.cancel(true);
            }
        }
    } else {
        for (int i = 0; i < futures.size(); i++) {
            // 收集一下正确处理完成的结果
            Future future = futures.get(i);
            try {
                LoadContext loadContext = (LoadContext) future.get();
                if (loadContext instanceof DbLoadContext) {
                    processedContexts.add((DbLoadContext) loadContext);
                }
            } catch (InterruptedException e) {
            // ignore
            } catch (ExecutionException e) {
            // ignore
            }
        }
    }
    if (exception != null) {
        throw exception;
    } else {
        return processedContexts;
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) BeansException(org.springframework.beans.BeansException) ExecutionException(java.util.concurrent.ExecutionException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Future(java.util.concurrent.Future) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) FileLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext) LoadContext(com.alibaba.otter.node.etl.load.loader.LoadContext) WeightController(com.alibaba.otter.node.etl.load.loader.weight.WeightController) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with LoadContext

use of com.alibaba.otter.node.etl.load.loader.LoadContext in project otter by alibaba.

the class LoadTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
                    List<LoadContext> processedContexts = null;
                    try {
                        // 后续可判断同步数据是否为rowData
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "load miss data with keys:" + keys.toString());
                            return;
                        }
                        // 进行数据load处理
                        otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
                        processedContexts = otterLoaderFactory.load(dbBatch);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        // 处理完成后通知single已完成
                        arbitrateEventService.loadEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
                        } else {
                            logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                        if (processedContexts != null) {
                            // 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
                            for (LoadContext context : processedContexts) {
                                try {
                                    if (context instanceof DbLoadContext) {
                                        dbLoadInterceptor.error((DbLoadContext) context);
                                    }
                                } catch (Throwable ie) {
                                    logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
                                }
                            }
                        }
                        if (!isInterrupt(e)) {
                            sendRollbackTermin(pipelineId, e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
                // 释放锁
                return;
            } else {
                logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
                // arbitrateEventService.loadEvent().release(pipelineId); //
                // 释放锁
                // 先解除lock,后发送rollback信号
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) LoadContext(com.alibaba.otter.node.etl.load.loader.LoadContext) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Aggregations

LoadContext (com.alibaba.otter.node.etl.load.loader.LoadContext)2 DbLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext)2 AggregationItem (com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem)1 PipeKey (com.alibaba.otter.node.etl.common.pipe.PipeKey)1 SetlFuture (com.alibaba.otter.node.etl.extract.SetlFuture)1 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)1 FileLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext)1 WeightController (com.alibaba.otter.node.etl.load.loader.weight.WeightController)1 EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)1 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)1 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)1 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 Future (java.util.concurrent.Future)1 BeansException (org.springframework.beans.BeansException)1