use of com.alibaba.otter.node.etl.extract.exceptions.ExtractException in project otter by alibaba.
the class DatabaseExtractor method extract.
@Override
public void extract(DbBatch dbBatch) throws ExtractException {
Assert.notNull(dbBatch);
Assert.notNull(dbBatch.getRowBatch());
// 读取配置
Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
// 如果是行记录是必须进行数据库反查
boolean isRow = pipeline.getParameters().getSyncMode().isRow();
// 读取一次配置
// 调整下线程池,Extractor会被池化处理
adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
ExecutorCompletionService completionService = new ExecutorCompletionService(executor);
// 进行并发提交
ExtractException exception = null;
// 每个表进行处理
List<DataItem> items = new ArrayList<DataItem>();
List<Future> futures = new ArrayList<Future>();
List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
DataItem item = new DataItem(eventData);
// 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia());
// 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
if (dataMedia.getSource().getType().isOracle()) {
flag |= true;
// 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
eventData.setRemedy(true);
}
}
if (isRow && !flag) {
// 提前判断一次,避免进入多线程进行竞争
// 针对view视图的情况,会有后续再判断一次
flag = checkNeedDbForRowMode(pipeline, eventData);
}
if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {
// 判断是否需要反查
// 提交进行并行查询
Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null);
if (future.isDone()) {
// 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
try {
future.get();
} catch (InterruptedException e) {
// 取消完之后立马退出
cancel(futures);
throw new ExtractException(e);
} catch (ExecutionException e) {
// 取消完之后立马退出
cancel(futures);
throw new ExtractException(e);
}
}
// 记录一下添加的任务
futures.add(future);
}
// 按顺序添加
items.add(item);
}
// 开始处理结果
int index = 0;
while (index < futures.size()) {
// 循环处理发出去的所有任务
try {
// 它也可能被打断
Future future = completionService.take();
future.get();
} catch (InterruptedException e) {
exception = new ExtractException(e);
// 如何一个future出现了异常,就退出
break;
} catch (ExecutionException e) {
exception = new ExtractException(e);
// 如何一个future出现了异常,就退出
break;
}
index++;
}
if (index < futures.size()) {
// 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
cancel(futures);
throw exception;
} else {
// 全部成功分支, 构造返回结果也要保证原始的顺序
for (int i = 0; i < items.size(); i++) {
DataItem item = items.get(i);
if (item.filter) {
// 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
eventDatas.remove(item.getEventData());
}
}
}
}
use of com.alibaba.otter.node.etl.extract.exceptions.ExtractException in project otter by alibaba.
the class FileExtractor method doFileExtract.
/**
* 返回这批变更数据对应的FileInfo.
*
* @param rowBatch
* @return
*/
private List<FileData> doFileExtract(RowBatch rowBatch) {
List<FileData> fileDatas = new ArrayList<FileData>();
// 处理数据
Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
List<EventData> eventDatas = rowBatch.getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
if (dataMediaPairs == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
}
for (DataMediaPair dataMediaPair : dataMediaPairs) {
if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
continue;
}
FileResolver fileResolver = null;
if (dataMediaPair.getResolverData() != null) {
fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
} else {
continue;
}
if (fileResolver == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
}
if (fileResolver instanceof RemoteDirectoryFetcherAware) {
RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
}
List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
// 做一下去重处理
for (FileData data : singleRowFileDatas) {
if (!fileDatas.contains(data)) {
fileDatas.add(data);
}
}
}
}
// 判断是否需要进行图片重复同步检查
if (pipeline.getParameters().getFileDetect()) {
doFileDetectCollector(pipeline, fileDatas);
}
return fileDatas;
}
use of com.alibaba.otter.node.etl.extract.exceptions.ExtractException in project otter by alibaba.
the class FreedomExtractor method extract.
public void extract(DbBatch dbBatch) throws ExtractException {
Assert.notNull(dbBatch);
// 读取配置
Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
boolean skipFreedom = pipeline.getParameters().getSkipFreedom();
String bufferSchema = pipeline.getParameters().getSystemSchema();
String bufferTable = pipeline.getParameters().getSystemBufferTable();
List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
// 使用set,提升remove时的查找速度
Set<EventData> removeDatas = new HashSet<EventData>();
for (EventData eventData : eventDatas) {
if (StringUtils.equalsIgnoreCase(bufferSchema, eventData.getSchemaName()) && StringUtils.equalsIgnoreCase(bufferTable, eventData.getTableName())) {
if (eventData.getEventType().isDdl()) {
continue;
}
if (skipFreedom) {
// 判断是否需要忽略
removeDatas.add(eventData);
continue;
}
// 只处理insert / update记录
if (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate()) {
// 重新改写一下EventData的数据,根据系统表的定义
EventColumn tableIdColumn = getMatchColumn(eventData.getColumns(), TABLE_ID);
// 获取到对应tableId的media信息
try {
DataMedia dataMedia = null;
Long tableId = Long.valueOf(tableIdColumn.getColumnValue());
eventData.setTableId(tableId);
if (tableId <= 0) {
// 直接按照full_name进行查找
// 尝试直接根据schema+table name进行查找
EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
if (fullNameColumn != null) {
String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
if (names.length >= 2) {
dataMedia = ConfigHelper.findSourceDataMedia(pipeline, names[0], names[1]);
eventData.setTableId(dataMedia.getId());
} else {
throw new ConfigException("no such DataMedia " + names);
}
}
} else {
// 如果指定了tableId,需要按照tableId进行严格查找,如果没找到,那说明不需要进行同步
dataMedia = ConfigHelper.findDataMedia(pipeline, Long.valueOf(tableIdColumn.getColumnValue()));
}
DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
// 考虑offer[1-128]的配置模式
if (!dataMedia.getNameMode().getMode().isSingle() || !dataMedia.getNamespaceMode().getMode().isSingle()) {
boolean hasError = true;
EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
if (fullNameColumn != null) {
String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
if (names.length >= 2) {
eventData.setSchemaName(names[0]);
eventData.setTableName(names[1]);
hasError = false;
}
}
if (hasError) {
// 出现异常,需要记录一下
logger.warn("dataMedia mode:{} , fullname:{} ", dataMedia.getMode(), fullNameColumn == null ? null : fullNameColumn.getColumnValue());
removeDatas.add(eventData);
// 跳过这条记录
continue;
}
} else {
eventData.setSchemaName(dataMedia.getNamespace());
eventData.setTableName(dataMedia.getName());
}
// 更新业务类型
EventColumn typeColumn = getMatchColumn(eventData.getColumns(), TYPE);
EventType eventType = EventType.valuesOf(typeColumn.getColumnValue());
eventData.setEventType(eventType);
if (eventType.isUpdate()) {
// 如果是update强制修改为insert,这样可以在目标端执行merge
// sql
eventData.setEventType(EventType.INSERT);
} else if (eventType.isDdl()) {
dbDialect.reloadTable(eventData.getSchemaName(), eventData.getTableName());
// 删除当前记录
removeDatas.add(eventData);
continue;
}
// 重新构建新的业务主键字段
EventColumn pkDataColumn = getMatchColumn(eventData.getColumns(), PK_DATA);
String pkData = pkDataColumn.getColumnValue();
String[] pks = StringUtils.split(pkData, PK_SPLIT);
Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
List<EventColumn> newColumns = new ArrayList<EventColumn>();
Column[] primaryKeyColumns = table.getPrimaryKeyColumns();
if (primaryKeyColumns.length > pks.length) {
throw new ExtractException("data pk column size not match , data:" + eventData.toString());
}
// 构建字段
Column[] allColumns = table.getColumns();
int pkIndex = 0;
for (int i = 0; i < allColumns.length; i++) {
Column column = allColumns[i];
if (column.isPrimaryKey()) {
EventColumn newColumn = new EventColumn();
// 设置下标
newColumn.setIndex(i);
newColumn.setColumnName(column.getName());
newColumn.setColumnType(column.getTypeCode());
newColumn.setColumnValue(pks[pkIndex]);
newColumn.setKey(true);
newColumn.setNull(pks[pkIndex] == null);
newColumn.setUpdate(true);
// 添加到记录
newColumns.add(newColumn);
pkIndex++;
}
}
// 设置数据
eventData.setKeys(newColumns);
eventData.setOldKeys(new ArrayList<EventColumn>());
eventData.setColumns(new ArrayList<EventColumn>());
// 设置为行记录+反查
eventData.setSyncMode(SyncMode.ROW);
eventData.setSyncConsistency(SyncConsistency.MEDIA);
eventData.setRemedy(true);
// 默认为1kb,如果还是按照binlog大小计算的话,可能会采用rpc传输,导致内存不够用
eventData.setSize(1024);
} catch (ConfigException e) {
// 忽略掉,因为系统表会被共享,所以这条记录会被不是该同步通道给获取到
logger.info("find DataMedia error " + eventData.toString(), e);
removeDatas.add(eventData);
continue;
} catch (Throwable e) {
// 出现异常时忽略掉
logger.warn("process freedom data error " + eventData.toString(), e);
removeDatas.add(eventData);
continue;
}
} else {
// 删除该记录
removeDatas.add(eventData);
}
}
}
if (!CollectionUtils.isEmpty(removeDatas)) {
eventDatas.removeAll(removeDatas);
}
}
use of com.alibaba.otter.node.etl.extract.exceptions.ExtractException in project otter by alibaba.
the class ProcessorExtractor method extract.
public void extract(DbBatch param) throws ExtractException {
ExecutorTemplate executorTemplate = null;
try {
RowBatch rowBatch = param.getRowBatch();
final Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
List<EventData> eventDatas = rowBatch.getDatas();
// 使用set,提升remove时的查找速度
final Set<EventData> removeDatas = Collections.synchronizedSet(new HashSet<EventData>());
executorTemplate = executorTemplateGetter.get();
executorTemplate.start();
// 重新设置下poolSize
executorTemplate.adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
for (final EventData eventData : eventDatas) {
List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
if (dataMediaPairs == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
}
for (DataMediaPair dataMediaPair : dataMediaPairs) {
if (!dataMediaPair.isExistFilter()) {
continue;
}
final EventProcessor eventProcessor = extensionFactory.getExtension(EventProcessor.class, dataMediaPair.getFilterData());
if (eventProcessor instanceof DataSourceFetcherAware) {
((DataSourceFetcherAware) eventProcessor).setDataSourceFetcher(new DataSourceFetcher() {
@Override
public DataSource fetch(Long tableId) {
DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, tableId);
return dataSourceService.getDataSource(pipeline.getId(), dataMedia.getSource());
}
});
executorTemplate.submit(new Runnable() {
@Override
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeline.getId()));
boolean process = eventProcessor.process(eventData);
if (!process) {
// 添加到删除记录中
removeDatas.add(eventData);
}
}
});
} else {
boolean process = eventProcessor.process(eventData);
if (!process) {
// 添加到删除记录中
removeDatas.add(eventData);
break;
}
}
}
}
// 等待所有都处理完成
executorTemplate.waitForResult();
if (!CollectionUtils.isEmpty(removeDatas)) {
eventDatas.removeAll(removeDatas);
}
} finally {
if (executorTemplate != null) {
executorTemplateGetter.release(executorTemplate);
}
}
}
use of com.alibaba.otter.node.etl.extract.exceptions.ExtractException in project otter by alibaba.
the class ViewExtractor method extract.
@Override
public void extract(DbBatch dbBatch) throws ExtractException {
Assert.notNull(dbBatch);
Assert.notNull(dbBatch.getRowBatch());
Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
List<DataMediaPair> dataMediaPairs = pipeline.getPairs();
/**
* Key = TableId<br>
* Value = a List of this tableId's column need to sync<br>
*/
Map<Long, List<ColumnPair>> viewColumnPairs = new HashMap<Long, List<ColumnPair>>();
Map<Long, ColumnPairMode> viewColumnPairModes = new HashMap<Long, ColumnPairMode>();
for (DataMediaPair dataMediaPair : dataMediaPairs) {
List<ColumnPair> columnPairs = dataMediaPair.getColumnPairs();
// 设置ColumnPairMode
viewColumnPairModes.put(dataMediaPair.getSource().getId(), dataMediaPair.getColumnPairMode());
// 如果没有columnPairs,则默认全字段同步,不做处理
if (!CollectionUtils.isEmpty(columnPairs)) {
viewColumnPairs.put(dataMediaPair.getSource().getId(), columnPairs);
}
}
List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
// 使用set,提升remove时的查找速度
Set<EventData> removeDatas = new HashSet<EventData>();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
List<ColumnPair> columns = viewColumnPairs.get(eventData.getTableId());
if (!CollectionUtils.isEmpty(columns)) {
// 组装需要同步的Column
ColumnPairMode mode = viewColumnPairModes.get(eventData.getTableId());
eventData.setColumns(columnFilter(eventData.getColumns(), columns, mode));
eventData.setKeys(columnFilter(eventData.getKeys(), columns, mode));
if (!CollectionUtils.isEmpty(eventData.getOldKeys())) {
eventData.setOldKeys(columnFilter(eventData.getOldKeys(), columns, mode));
}
if (CollectionUtils.isEmpty(eventData.getKeys())) {
// 无主键,报错
throw new ExtractException(String.format("eventData after viewExtractor has no pks , pls check! identity:%s, new eventData:%s", dbBatch.getRowBatch().getIdentity().toString(), eventData.toString()));
}
// update: 过滤后如果无字段(变更需要同步)和主键变更,则可以忽略之,避免sql语法错误
if (eventData.getEventType().isUpdate() && (CollectionUtils.isEmpty(eventData.getColumns()) || CollectionUtils.isEmpty(eventData.getUpdatedColumns())) && CollectionUtils.isEmpty(eventData.getOldKeys())) {
// 过滤之后无字段需要同步,并且不存在主键变更同步,则忽略该记录
removeDatas.add(eventData);
}
}
}
if (!CollectionUtils.isEmpty(removeDatas)) {
eventDatas.removeAll(removeDatas);
}
}
Aggregations