use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.
the class DbLoadAction method load.
/**
* 返回结果为已处理成功的记录
*/
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
Assert.notNull(rowBatch);
Identity identity = rowBatch.getIdentity();
DbLoadContext context = buildContext(identity);
try {
List<EventData> datas = rowBatch.getDatas();
context.setPrepareDatas(datas);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
if (datas == null || datas.size() == 0) {
logger.info("##no eventdata for load, return");
return context;
}
// 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
// 设置media source时,只需要取第一节点的source即可
context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
interceptor.prepare(context);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
// 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
if (isDdlDatas(datas)) {
doDdl(context, datas);
} else {
WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
List<Long> weights = buckets.weights();
// weights可能为空,也得调用start方法
controller.start(weights);
if (CollectionUtils.isEmpty(datas)) {
logger.info("##no eventdata for load");
}
// 根据manager配置调整线程池
adjustPoolSize(context);
// 调整一下运行参数
adjustConfig(context);
// 处理数据
for (int i = 0; i < weights.size(); i++) {
Long weight = weights.get(i);
controller.await(weight.intValue());
// 处理同一个weight下的数据
List<EventData> items = buckets.getItems(weight);
logger.debug("##start load for weight:" + weight);
// 预处理下数据
// 进行一次数据合并,合并相同pk的多次I/U/D操作
items = DbLoadMerger.merge(items);
// 按I/U/D进行归并处理
DbLoadData loadData = new DbLoadData();
doBefore(items, context, loadData);
// 执行load操作
doLoad(context, loadData);
controller.single(weight.intValue());
logger.debug("##end load for weight:" + weight);
}
}
interceptor.commit(context);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
interceptor.error(context);
} catch (Exception e) {
interceptor.error(context);
throw new LoadException(e);
}
// 返回处理成功的记录
return context;
}
use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.
the class DbLoadAction method buildContext.
private DbLoadContext buildContext(Identity identity) {
DbLoadContext context = new DbLoadContext();
context.setIdentity(identity);
Channel channel = configClientService.findChannel(identity.getChannelId());
Pipeline pipeline = configClientService.findPipeline(identity.getPipelineId());
context.setChannel(channel);
context.setPipeline(pipeline);
return context;
}
use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.
the class LoadTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
List<LoadContext> processedContexts = null;
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "load miss data with keys:" + keys.toString());
return;
}
// 进行数据load处理
otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
processedContexts = otterLoaderFactory.load(dbBatch);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.loadEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
} else {
logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
if (processedContexts != null) {
// 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
for (LoadContext context : processedContexts) {
try {
if (context instanceof DbLoadContext) {
dbLoadInterceptor.error((DbLoadContext) context);
}
} catch (Throwable ie) {
logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
}
}
}
if (!isInterrupt(e)) {
sendRollbackTermin(pipelineId, e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
// 释放锁
return;
} else {
logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
// arbitrateEventService.loadEvent().release(pipelineId); //
// 释放锁
// 先解除lock,后发送rollback信号
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext in project otter by alibaba.
the class DataBatchLoader method load.
public List<LoadContext> load(DbBatch data) {
final RowBatch rowBatch = data.getRowBatch();
final FileBatch fileBatch = data.getFileBatch();
boolean existFileBatch = (rowBatch != null && !CollectionUtils.isEmpty(fileBatch.getFiles()) && data.getRoot() != null);
boolean existRowBatch = (rowBatch != null && !CollectionUtils.isEmpty(rowBatch.getDatas()));
int count = 0;
List<RowBatch> rowBatchs = null;
if (existRowBatch) {
// 根据介质内容进行分类合并,每个介质一个载入通道
rowBatchs = split(rowBatch);
count += rowBatchs.size();
}
if (existFileBatch) {
count += 1;
}
WeightController controller = new WeightController(count);
List<Future> futures = new ArrayList<Future>();
ExecutorCompletionService completionService = new ExecutorCompletionService(executorService);
if (existFileBatch) {
submitFileBatch(futures, completionService, fileBatch, data.getRoot(), controller);
}
if (existRowBatch) {
submitRowBatch(futures, completionService, rowBatchs, controller);
}
// 先获取一下异步处理的结果,记录一下出错的index
List<LoadContext> processedContexts = new ArrayList<LoadContext>();
int index = 0;
LoadException exception = null;
while (index < futures.size()) {
try {
// 它也可能被打断
Future future = completionService.take();
future.get();
} catch (InterruptedException e) {
exception = new LoadException(e);
break;
} catch (ExecutionException e) {
exception = new LoadException(e);
break;
}
index++;
}
// 任何一个线程返回,出现了异常,就退出整个调度
if (index < futures.size()) {
// 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
for (int errorIndex = 0; errorIndex < futures.size(); errorIndex++) {
Future future = futures.get(errorIndex);
if (future.isDone()) {
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
// 做一下出错处理,记录到store中
dbInterceptor.error((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
} catch (Exception e) {
logger.error("interceptor process error failed", e);
}
} else {
// 对未完成的进行取消
future.cancel(true);
}
}
} else {
for (int i = 0; i < futures.size(); i++) {
// 收集一下正确处理完成的结果
Future future = futures.get(i);
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
processedContexts.add((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
}
}
}
if (exception != null) {
throw exception;
} else {
return processedContexts;
}
}
Aggregations