use of com.alibaba.otter.node.etl.load.loader.LoadContext in project otter by alibaba.
the class DataBatchLoader method load.
public List<LoadContext> load(DbBatch data) {
final RowBatch rowBatch = data.getRowBatch();
final FileBatch fileBatch = data.getFileBatch();
boolean existFileBatch = (rowBatch != null && !CollectionUtils.isEmpty(fileBatch.getFiles()) && data.getRoot() != null);
boolean existRowBatch = (rowBatch != null && !CollectionUtils.isEmpty(rowBatch.getDatas()));
int count = 0;
List<RowBatch> rowBatchs = null;
if (existRowBatch) {
// 根据介质内容进行分类合并,每个介质一个载入通道
rowBatchs = split(rowBatch);
count += rowBatchs.size();
}
if (existFileBatch) {
count += 1;
}
WeightController controller = new WeightController(count);
List<Future> futures = new ArrayList<Future>();
ExecutorCompletionService completionService = new ExecutorCompletionService(executorService);
if (existFileBatch) {
submitFileBatch(futures, completionService, fileBatch, data.getRoot(), controller);
}
if (existRowBatch) {
submitRowBatch(futures, completionService, rowBatchs, controller);
}
// 先获取一下异步处理的结果,记录一下出错的index
List<LoadContext> processedContexts = new ArrayList<LoadContext>();
int index = 0;
LoadException exception = null;
while (index < futures.size()) {
try {
// 它也可能被打断
Future future = completionService.take();
future.get();
} catch (InterruptedException e) {
exception = new LoadException(e);
break;
} catch (ExecutionException e) {
exception = new LoadException(e);
break;
}
index++;
}
// 任何一个线程返回,出现了异常,就退出整个调度
if (index < futures.size()) {
// 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
for (int errorIndex = 0; errorIndex < futures.size(); errorIndex++) {
Future future = futures.get(errorIndex);
if (future.isDone()) {
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
// 做一下出错处理,记录到store中
dbInterceptor.error((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
} catch (Exception e) {
logger.error("interceptor process error failed", e);
}
} else {
// 对未完成的进行取消
future.cancel(true);
}
}
} else {
for (int i = 0; i < futures.size(); i++) {
// 收集一下正确处理完成的结果
Future future = futures.get(i);
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
processedContexts.add((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
}
}
}
if (exception != null) {
throw exception;
} else {
return processedContexts;
}
}
use of com.alibaba.otter.node.etl.load.loader.LoadContext in project otter by alibaba.
the class LoadTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
List<LoadContext> processedContexts = null;
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "load miss data with keys:" + keys.toString());
return;
}
// 进行数据load处理
otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
processedContexts = otterLoaderFactory.load(dbBatch);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.loadEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
} else {
logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
if (processedContexts != null) {
// 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
for (LoadContext context : processedContexts) {
try {
if (context instanceof DbLoadContext) {
dbLoadInterceptor.error((DbLoadContext) context);
}
} catch (Throwable ie) {
logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
}
}
}
if (!isInterrupt(e)) {
sendRollbackTermin(pipelineId, e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
// 释放锁
return;
} else {
logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
// arbitrateEventService.loadEvent().release(pipelineId); //
// 释放锁
// 先解除lock,后发送rollback信号
sendRollbackTermin(pipelineId, e);
}
}
}
}
Aggregations