use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class DbLoadAction method load.
/**
* 返回结果为已处理成功的记录
*/
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
Assert.notNull(rowBatch);
Identity identity = rowBatch.getIdentity();
DbLoadContext context = buildContext(identity);
try {
List<EventData> datas = rowBatch.getDatas();
context.setPrepareDatas(datas);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
if (datas == null || datas.size() == 0) {
logger.info("##no eventdata for load, return");
return context;
}
// 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
// 设置media source时,只需要取第一节点的source即可
context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
interceptor.prepare(context);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
// 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
if (isDdlDatas(datas)) {
doDdl(context, datas);
} else {
WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
List<Long> weights = buckets.weights();
// weights可能为空,也得调用start方法
controller.start(weights);
if (CollectionUtils.isEmpty(datas)) {
logger.info("##no eventdata for load");
}
// 根据manager配置调整线程池
adjustPoolSize(context);
// 调整一下运行参数
adjustConfig(context);
// 处理数据
for (int i = 0; i < weights.size(); i++) {
Long weight = weights.get(i);
controller.await(weight.intValue());
// 处理同一个weight下的数据
List<EventData> items = buckets.getItems(weight);
logger.debug("##start load for weight:" + weight);
// 预处理下数据
// 进行一次数据合并,合并相同pk的多次I/U/D操作
items = DbLoadMerger.merge(items);
// 按I/U/D进行归并处理
DbLoadData loadData = new DbLoadData();
doBefore(items, context, loadData);
// 执行load操作
doLoad(context, loadData);
controller.single(weight.intValue());
logger.debug("##end load for weight:" + weight);
}
}
interceptor.commit(context);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
interceptor.error(context);
} catch (Exception e) {
interceptor.error(context);
throw new LoadException(e);
}
// 返回处理成功的记录
return context;
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class DbLoadAction method doDdl.
/**
* 执行ddl的调用,处理逻辑比较简单: 串行调用
*
* @param context
* @param eventDatas
*/
private void doDdl(DbLoadContext context, List<EventData> eventDatas) {
for (final EventData data : eventDatas) {
DataMedia dataMedia = ConfigHelper.findDataMedia(context.getPipeline(), data.getTableId());
final DbDialect dbDialect = dbDialectFactory.getDbDialect(context.getIdentity().getPipelineId(), (DbMediaSource) dataMedia.getSource());
Boolean skipDdlException = context.getPipeline().getParameters().getSkipDdlException();
try {
Boolean result = dbDialect.getJdbcTemplate().execute(new StatementCallback<Boolean>() {
public Boolean doInStatement(Statement stmt) throws SQLException, DataAccessException {
Boolean result = true;
if (dbDialect instanceof MysqlDialect && StringUtils.isNotEmpty(data.getDdlSchemaName())) {
// 如果mysql,执行ddl时,切换到在源库执行的schema上
// result &= stmt.execute("use " +
// data.getDdlSchemaName());
// 解决当数据库名称为关键字如"Order"的时候,会报错,无法同步
result &= stmt.execute("use `" + data.getDdlSchemaName() + "`");
}
result &= stmt.execute(data.getSql());
return result;
}
});
if (result) {
// 记录为成功处理的sql
context.getProcessedDatas().add(data);
} else {
context.getFailedDatas().add(data);
}
} catch (Throwable e) {
if (skipDdlException) {
// do skip
logger.warn("skip exception for ddl : {} , caused by {}", data, ExceptionUtils.getFullStackTrace(e));
} else {
throw new LoadException(e);
}
}
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class DbLoadMerger method mergeUpdate.
private static void mergeUpdate(EventData eventData, Map<RowKey, EventData> result) {
RowKey rowKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys());
if (!CollectionUtils.isEmpty(eventData.getOldKeys())) {
// 存在主键变更
// 需要解决(1->2 , 2->3)级联主键变更的问题
RowKey oldKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getOldKeys());
if (!result.containsKey(oldKey)) {
// 不需要级联
result.put(rowKey, eventData);
} else {
EventData oldEventData = result.get(oldKey);
eventData.setSize(oldEventData.getSize() + eventData.getSize());
// 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
if (oldEventData.getEventType() == EventType.INSERT) {
eventData.setEventType(EventType.INSERT);
// 删除当前变更数据老主键的记录.
result.remove(oldKey);
EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
// 清空oldkeys,insert记录不需要
mergeEventData.getOldKeys().clear();
result.put(rowKey, mergeEventData);
} else if (oldEventData.getEventType() == EventType.UPDATE) {
// 删除当前变更数据老主键的记录.
result.remove(oldKey);
// 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
result.put(rowKey, mergeEventData);
} else {
throw new LoadException("delete(has old pks) + update impossible happed!");
}
}
} else {
if (!result.containsKey(rowKey)) {
// 没有主键变更
result.put(rowKey, eventData);
} else {
EventData oldEventData = result.get(rowKey);
// 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
if (oldEventData.getEventType() == EventType.INSERT) {
eventData.setEventType(EventType.INSERT);
EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
result.put(rowKey, mergeEventData);
} else if (oldEventData.getEventType() == EventType.UPDATE) {
// 可能存在
// 1->2
// ,
// 2update的问题
// 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
result.put(rowKey, mergeEventData);
} else if (oldEventData.getEventType() == EventType.DELETE) {
// 异常情况,出现 delete + update,那就直接更新为update
result.put(rowKey, eventData);
}
}
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class FileLoadAction method moveFiles.
/**
* 多线程处理文件加载,使用 fast-fail 策略
*/
private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
Exception exception = null;
adjustPoolSize(context);
ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>(executor);
List<Future<Exception>> results = new ArrayList<Future<Exception>>();
for (FileData fileData : fileDatas) {
Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData));
results.add(future);
// fast fail
if (future.isDone()) {
// 如果是自己执行的任务(线程池采用 CallerRunsPolicy),则立刻进行检查
try {
exception = future.get();
} catch (Exception e) {
exception = e;
}
if (exception != null) {
for (Future<Exception> result : results) {
if (!result.isDone() && !result.isCancelled()) {
result.cancel(true);
}
}
throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
}
}
}
int resultSize = results.size();
int cursor = 0;
while (cursor < resultSize) {
try {
Future<Exception> result = executorComplition.take();
exception = result.get();
} catch (Exception e) {
exception = e;
break;
}
cursor++;
}
if (cursor != resultSize) {
// 发现任务出错,立刻把正在进行的任务取消
for (Future<Exception> future : results) {
if (!future.isDone() && !future.isCancelled()) {
future.cancel(true);
}
}
}
if (exception != null) {
throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class FileLoadAction method load.
/**
* 返回结果为已处理成功的记录
*/
public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) {
if (false == rootDir.exists()) {
throw new LoadException(rootDir.getPath() + " is not exist");
}
FileLoadContext context = buildContext(fileBatch.getIdentity());
context.setPrepareDatas(fileBatch.getFiles());
boolean isDryRun = context.getPipeline().getParameters().isDryRun();
try {
// 复制成功的文件信息
WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles());
List<Long> weights = buckets.weights();
controller.start(weights);
// 处理数据
for (int i = 0; i < weights.size(); i++) {
Long weight = weights.get(i);
controller.await(weight.intValue());
if (logger.isInfoEnabled()) {
logger.debug("##start load for weight:{}\n", weight);
}
// 处理同一个weight下的数据
List<FileData> items = buckets.getItems(weight);
if (context.getPipeline().getParameters().isDryRun()) {
dryRun(context, items, rootDir);
} else {
moveFiles(context, items, rootDir);
}
controller.single(weight.intValue());
if (logger.isInfoEnabled()) {
logger.debug("##end load for weight:{}\n", weight);
}
}
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("successed", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("error", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
} catch (Exception e) {
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("error", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
throw new LoadException(e);
} finally {
// 不论是否移动成功,删除临时目录
NioUtils.delete(rootDir, 3);
}
return context;
}
Aggregations