Search in sources :

Example 1 with DbLoadContext

use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.

the class DbLoadAction method load.

/**
     * 返回结果为已处理成功的记录
     */
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
    Assert.notNull(rowBatch);
    Identity identity = rowBatch.getIdentity();
    DbLoadContext context = buildContext(identity);
    try {
        List<EventData> datas = rowBatch.getDatas();
        context.setPrepareDatas(datas);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        if (datas == null || datas.size() == 0) {
            logger.info("##no eventdata for load, return");
            return context;
        }
        // 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
        // 设置media source时,只需要取第一节点的source即可
        context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
        interceptor.prepare(context);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        // 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
        if (isDdlDatas(datas)) {
            doDdl(context, datas);
        } else {
            WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
            List<Long> weights = buckets.weights();
            // weights可能为空,也得调用start方法
            controller.start(weights);
            if (CollectionUtils.isEmpty(datas)) {
                logger.info("##no eventdata for load");
            }
            // 根据manager配置调整线程池
            adjustPoolSize(context);
            // 调整一下运行参数
            adjustConfig(context);
            // 处理数据
            for (int i = 0; i < weights.size(); i++) {
                Long weight = weights.get(i);
                controller.await(weight.intValue());
                // 处理同一个weight下的数据
                List<EventData> items = buckets.getItems(weight);
                logger.debug("##start load for weight:" + weight);
                // 预处理下数据
                // 进行一次数据合并,合并相同pk的多次I/U/D操作
                items = DbLoadMerger.merge(items);
                // 按I/U/D进行归并处理
                DbLoadData loadData = new DbLoadData();
                doBefore(items, context, loadData);
                // 执行load操作
                doLoad(context, loadData);
                controller.single(weight.intValue());
                logger.debug("##end load for weight:" + weight);
            }
        }
        interceptor.commit(context);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        interceptor.error(context);
    } catch (Exception e) {
        interceptor.error(context);
        throw new LoadException(e);
    }
    // 返回处理成功的记录
    return context;
}
Also used : DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Identity(com.alibaba.otter.shared.etl.model.Identity) EventData(com.alibaba.otter.shared.etl.model.EventData) DataIntegrityViolationException(org.springframework.dao.DataIntegrityViolationException) DataAccessException(org.springframework.dao.DataAccessException) SQLException(java.sql.SQLException) DeadlockLoserDataAccessException(org.springframework.dao.DeadlockLoserDataAccessException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 2 with DbLoadContext

use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.

the class DbLoadAction method buildContext.

private DbLoadContext buildContext(Identity identity) {
    DbLoadContext context = new DbLoadContext();
    context.setIdentity(identity);
    Channel channel = configClientService.findChannel(identity.getChannelId());
    Pipeline pipeline = configClientService.findPipeline(identity.getPipelineId());
    context.setChannel(channel);
    context.setPipeline(pipeline);
    return context;
}
Also used : DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Channel(com.alibaba.otter.shared.common.model.config.channel.Channel) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 3 with DbLoadContext

use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.

the class LoadTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
                    List<LoadContext> processedContexts = null;
                    try {
                        // 后续可判断同步数据是否为rowData
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "load miss data with keys:" + keys.toString());
                            return;
                        }
                        // 进行数据load处理
                        otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
                        processedContexts = otterLoaderFactory.load(dbBatch);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        // 处理完成后通知single已完成
                        arbitrateEventService.loadEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
                        } else {
                            logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                        if (processedContexts != null) {
                            // 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
                            for (LoadContext context : processedContexts) {
                                try {
                                    if (context instanceof DbLoadContext) {
                                        dbLoadInterceptor.error((DbLoadContext) context);
                                    }
                                } catch (Throwable ie) {
                                    logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
                                }
                            }
                        }
                        if (!isInterrupt(e)) {
                            sendRollbackTermin(pipelineId, e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
                // 释放锁
                return;
            } else {
                logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
                // arbitrateEventService.loadEvent().release(pipelineId); //
                // 释放锁
                // 先解除lock,后发送rollback信号
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) LoadContext(com.alibaba.otter.node.etl.load.loader.LoadContext) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Example 4 with DbLoadContext

use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.

the class DataBatchLoader method load.

public List<LoadContext> load(DbBatch data) {
    final RowBatch rowBatch = data.getRowBatch();
    final FileBatch fileBatch = data.getFileBatch();
    boolean existFileBatch = (rowBatch != null && !CollectionUtils.isEmpty(fileBatch.getFiles()) && data.getRoot() != null);
    boolean existRowBatch = (rowBatch != null && !CollectionUtils.isEmpty(rowBatch.getDatas()));
    int count = 0;
    List<RowBatch> rowBatchs = null;
    if (existRowBatch) {
        // 根据介质内容进行分类合并,每个介质一个载入通道
        rowBatchs = split(rowBatch);
        count += rowBatchs.size();
    }
    if (existFileBatch) {
        count += 1;
    }
    WeightController controller = new WeightController(count);
    List<Future> futures = new ArrayList<Future>();
    ExecutorCompletionService completionService = new ExecutorCompletionService(executorService);
    if (existFileBatch) {
        submitFileBatch(futures, completionService, fileBatch, data.getRoot(), controller);
    }
    if (existRowBatch) {
        submitRowBatch(futures, completionService, rowBatchs, controller);
    }
    // 先获取一下异步处理的结果,记录一下出错的index
    List<LoadContext> processedContexts = new ArrayList<LoadContext>();
    int index = 0;
    LoadException exception = null;
    while (index < futures.size()) {
        try {
            // 它也可能被打断
            Future future = completionService.take();
            future.get();
        } catch (InterruptedException e) {
            exception = new LoadException(e);
            break;
        } catch (ExecutionException e) {
            exception = new LoadException(e);
            break;
        }
        index++;
    }
    // 任何一个线程返回,出现了异常,就退出整个调度
    if (index < futures.size()) {
        // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
        for (int errorIndex = 0; errorIndex < futures.size(); errorIndex++) {
            Future future = futures.get(errorIndex);
            if (future.isDone()) {
                try {
                    LoadContext loadContext = (LoadContext) future.get();
                    if (loadContext instanceof DbLoadContext) {
                        // 做一下出错处理,记录到store中
                        dbInterceptor.error((DbLoadContext) loadContext);
                    }
                } catch (InterruptedException e) {
                // ignore
                } catch (ExecutionException e) {
                // ignore
                } catch (Exception e) {
                    logger.error("interceptor process error failed", e);
                }
            } else {
                // 对未完成的进行取消
                future.cancel(true);
            }
        }
    } else {
        for (int i = 0; i < futures.size(); i++) {
            // 收集一下正确处理完成的结果
            Future future = futures.get(i);
            try {
                LoadContext loadContext = (LoadContext) future.get();
                if (loadContext instanceof DbLoadContext) {
                    processedContexts.add((DbLoadContext) loadContext);
                }
            } catch (InterruptedException e) {
            // ignore
            } catch (ExecutionException e) {
            // ignore
            }
        }
    }
    if (exception != null) {
        throw exception;
    } else {
        return processedContexts;
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) BeansException(org.springframework.beans.BeansException) ExecutionException(java.util.concurrent.ExecutionException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Future(java.util.concurrent.Future) DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) FileLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext) LoadContext(com.alibaba.otter.node.etl.load.loader.LoadContext) WeightController(com.alibaba.otter.node.etl.load.loader.weight.WeightController) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

DbLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext)4 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)2 LoadContext (com.alibaba.otter.node.etl.load.loader.LoadContext)2 AggregationItem (com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem)1 PipeKey (com.alibaba.otter.node.etl.common.pipe.PipeKey)1 SetlFuture (com.alibaba.otter.node.etl.extract.SetlFuture)1 FileLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext)1 WeightController (com.alibaba.otter.node.etl.load.loader.weight.WeightController)1 EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)1 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)1 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)1 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)1 EventData (com.alibaba.otter.shared.etl.model.EventData)1 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)1 Identity (com.alibaba.otter.shared.etl.model.Identity)1 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ExecutionException (java.util.concurrent.ExecutionException)1