use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.
the class DbLoadAction method load.
/**
* 返回结果为已处理成功的记录
*/
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
Assert.notNull(rowBatch);
Identity identity = rowBatch.getIdentity();
DbLoadContext context = buildContext(identity);
try {
List<EventData> datas = rowBatch.getDatas();
context.setPrepareDatas(datas);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
if (datas == null || datas.size() == 0) {
logger.info("##no eventdata for load, return");
return context;
}
// 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
// 设置media source时,只需要取第一节点的source即可
context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
interceptor.prepare(context);
// 执行重复录入数据过滤
datas = context.getPrepareDatas();
// 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
if (isDdlDatas(datas)) {
doDdl(context, datas);
} else {
WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
List<Long> weights = buckets.weights();
// weights可能为空,也得调用start方法
controller.start(weights);
if (CollectionUtils.isEmpty(datas)) {
logger.info("##no eventdata for load");
}
// 根据manager配置调整线程池
adjustPoolSize(context);
// 调整一下运行参数
adjustConfig(context);
// 处理数据
for (int i = 0; i < weights.size(); i++) {
Long weight = weights.get(i);
controller.await(weight.intValue());
// 处理同一个weight下的数据
List<EventData> items = buckets.getItems(weight);
logger.debug("##start load for weight:" + weight);
// 预处理下数据
// 进行一次数据合并,合并相同pk的多次I/U/D操作
items = DbLoadMerger.merge(items);
// 按I/U/D进行归并处理
DbLoadData loadData = new DbLoadData();
doBefore(items, context, loadData);
// 执行load操作
doLoad(context, loadData);
controller.single(weight.intValue());
logger.debug("##end load for weight:" + weight);
}
}
interceptor.commit(context);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
interceptor.error(context);
} catch (Exception e) {
interceptor.error(context);
throw new LoadException(e);
}
// 返回处理成功的记录
return context;
}
use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.
the class DbLoadAction method doDdl.
/**
* 执行ddl的调用,处理逻辑比较简单: 串行调用
*
* @param context
* @param eventDatas
*/
private void doDdl(DbLoadContext context, List<EventData> eventDatas) {
for (final EventData data : eventDatas) {
DataMedia dataMedia = ConfigHelper.findDataMedia(context.getPipeline(), data.getTableId());
final DbDialect dbDialect = dbDialectFactory.getDbDialect(context.getIdentity().getPipelineId(), (DbMediaSource) dataMedia.getSource());
Boolean skipDdlException = context.getPipeline().getParameters().getSkipDdlException();
try {
Boolean result = dbDialect.getJdbcTemplate().execute(new StatementCallback<Boolean>() {
public Boolean doInStatement(Statement stmt) throws SQLException, DataAccessException {
Boolean result = false;
if (dbDialect instanceof MysqlDialect && StringUtils.isNotEmpty(data.getDdlSchemaName())) {
// 如果mysql,执行ddl时,切换到在源库执行的schema上
// result &= stmt.execute("use " + data.getDdlSchemaName());
// 解决当数据库名称为关键字如"Order"的时候,会报错,无法同步
result &= stmt.execute("use `" + data.getDdlSchemaName() + "`");
}
result &= stmt.execute(data.getSql());
return result;
}
});
if (result) {
// 记录为成功处理的sql
context.getProcessedDatas().add(data);
} else {
context.getFailedDatas().add(data);
}
} catch (Throwable e) {
if (skipDdlException) {
// do skip
logger.warn("skip exception for ddl : {} , caused by {}", data, ExceptionUtils.getFullStackTrace(e));
} else {
throw new LoadException(e);
}
}
}
}
use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.
the class DatabaseExtractor method extract.
@Override
public void extract(DbBatch dbBatch) throws ExtractException {
Assert.notNull(dbBatch);
Assert.notNull(dbBatch.getRowBatch());
// 读取配置
Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
// 如果是行记录是必须进行数据库反查
boolean isRow = pipeline.getParameters().getSyncMode().isRow();
// 读取一次配置
// 调整下线程池,Extractor会被池化处理
adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
ExecutorCompletionService completionService = new ExecutorCompletionService(executor);
// 进行并发提交
ExtractException exception = null;
// 每个表进行处理
List<DataItem> items = new ArrayList<DataItem>();
List<Future> futures = new ArrayList<Future>();
List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
DataItem item = new DataItem(eventData);
// 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia());
// 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
if (dataMedia.getSource().getType().isOracle()) {
flag |= true;
// 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
eventData.setRemedy(true);
}
}
if (isRow && !flag) {
// 提前判断一次,避免进入多线程进行竞争
// 针对view视图的情况,会有后续再判断一次
flag = checkNeedDbForRowMode(pipeline, eventData);
}
if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {
// 判断是否需要反查
// 提交进行并行查询
Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null);
if (future.isDone()) {
// 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
try {
future.get();
} catch (InterruptedException e) {
// 取消完之后立马退出
cancel(futures);
throw new ExtractException(e);
} catch (ExecutionException e) {
// 取消完之后立马退出
cancel(futures);
throw new ExtractException(e);
}
}
// 记录一下添加的任务
futures.add(future);
}
// 按顺序添加
items.add(item);
}
// 开始处理结果
int index = 0;
while (index < futures.size()) {
// 循环处理发出去的所有任务
try {
// 它也可能被打断
Future future = completionService.take();
future.get();
} catch (InterruptedException e) {
exception = new ExtractException(e);
// 如何一个future出现了异常,就退出
break;
} catch (ExecutionException e) {
exception = new ExtractException(e);
// 如何一个future出现了异常,就退出
break;
}
index++;
}
if (index < futures.size()) {
// 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
cancel(futures);
throw exception;
} else {
// 全部成功分支, 构造返回结果也要保证原始的顺序
for (int i = 0; i < items.size(); i++) {
DataItem item = items.get(i);
if (item.filter) {
// 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
eventDatas.remove(item.getEventData());
}
}
}
}
use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.
the class FileExtractor method doFileExtract.
/**
* 返回这批变更数据对应的FileInfo.
*
* @param rowBatch
* @return
*/
private List<FileData> doFileExtract(RowBatch rowBatch) {
List<FileData> fileDatas = new ArrayList<FileData>();
// 处理数据
Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
List<EventData> eventDatas = rowBatch.getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
if (dataMediaPairs == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
}
for (DataMediaPair dataMediaPair : dataMediaPairs) {
if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
continue;
}
FileResolver fileResolver = null;
if (dataMediaPair.getResolverData() != null) {
fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
} else {
continue;
}
if (fileResolver == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
}
if (fileResolver instanceof RemoteDirectoryFetcherAware) {
RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
}
List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
// 做一下去重处理
for (FileData data : singleRowFileDatas) {
if (!fileDatas.contains(data)) {
fileDatas.add(data);
}
}
}
}
// 判断是否需要进行图片重复同步检查
if (pipeline.getParameters().getFileDetect()) {
doFileDetectCollector(pipeline, fileDatas);
}
return fileDatas;
}
use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.
the class GroupExtractor method extract.
@Override
public void extract(DbBatch dbBatch) throws ExtractException {
Assert.notNull(dbBatch);
Assert.notNull(dbBatch.getRowBatch());
Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
List<DataMediaPair> dataMediaPairs = pipeline.getPairs();
/**
* Key = TableId<br>
* Value = a List of this tableId's column need to sync<br>
*/
Map<Long, List<ColumnGroup>> groupColumns = new HashMap<Long, List<ColumnGroup>>();
for (DataMediaPair dataMediaPair : dataMediaPairs) {
List<ColumnGroup> columnGroups = dataMediaPair.getColumnGroups();
if (!CollectionUtils.isEmpty(columnGroups)) {
groupColumns.put(dataMediaPair.getSource().getId(), columnGroups);
}
}
List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
List<ColumnGroup> columnGroups = groupColumns.get(eventData.getTableId());
if (!CollectionUtils.isEmpty(columnGroups)) {
for (ColumnGroup columnGroup : columnGroups) {
if (columnGroup != null && !CollectionUtils.isEmpty(columnGroup.getColumnPairs())) {
groupFilter(eventData, columnGroup);
}
}
}
}
}
Aggregations