use of com.alibaba.otter.node.etl.select.exceptions.SelectException in project otter by alibaba.
the class MessageParser method parse.
/**
* 将对应canal送出来的Entry对象解析为otter使用的内部对象
*
* <pre>
* 需要处理数据过滤:
* 1. Transaction Begin/End过滤
* 2. retl.retl_client/retl.retl_mark 回环标记处理以及后续的回环数据过滤
* 3. retl.xdual canal心跳表数据过滤
* </pre>
*/
public List<EventData> parse(Long pipelineId, List<Entry> datas) throws SelectException {
List<EventData> eventDatas = new ArrayList<EventData>();
Pipeline pipeline = configClientService.findPipeline(pipelineId);
List<Entry> transactionDataBuffer = new ArrayList<Entry>();
// hz为主站点,us->hz的数据,需要回环同步会us。并且需要开启回环补救算法
PipelineParameter pipelineParameter = pipeline.getParameters();
boolean enableLoopbackRemedy = pipelineParameter.isEnableRemedy() && pipelineParameter.isHome() && pipelineParameter.getRemedyAlgorithm().isLoopback();
boolean isLoopback = false;
// 判断是否属于需要loopback处理的类型,只处理正常otter同步产生的回环数据,因为会有业务方手工屏蔽同步的接口,避免回环
boolean needLoopback = false;
long now = new Date().getTime();
try {
for (Entry entry : datas) {
switch(entry.getEntryType()) {
case TRANSACTIONBEGIN:
isLoopback = false;
break;
case ROWDATA:
String tableName = entry.getHeader().getTableName();
// 判断是否是回环表retl_mark
boolean isMarkTable = tableName.equalsIgnoreCase(pipeline.getParameters().getSystemMarkTable());
if (isMarkTable) {
RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
if (!rowChange.getIsDdl()) {
int loopback = checkLoopback(pipeline, rowChange.getRowDatas(0));
if (loopback == 2) {
// 只处理正常同步产生的回环数据
needLoopback |= true;
}
isLoopback |= loopback > 0;
}
}
// 检查下otter3.0的回环表,对应的schmea会比较随意,所以不做比较
boolean isCompatibleLoopback = tableName.equalsIgnoreCase(compatibleMarkTable);
if (isCompatibleLoopback) {
RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
if (!rowChange.getIsDdl()) {
int loopback = checkCompatibleLoopback(pipeline, rowChange.getRowDatas(0));
if (loopback == 2) {
// 只处理正常同步产生的回环数据
needLoopback |= true;
}
isLoopback |= loopback > 0;
}
}
if ((!isLoopback || (enableLoopbackRemedy && needLoopback)) && !isMarkTable && !isCompatibleLoopback) {
transactionDataBuffer.add(entry);
}
break;
case TRANSACTIONEND:
if (!isLoopback || (enableLoopbackRemedy && needLoopback)) {
// 添加数据解析
for (Entry bufferEntry : transactionDataBuffer) {
List<EventData> parseDatas = internParse(pipeline, bufferEntry);
if (CollectionUtils.isEmpty(parseDatas)) {
// 可能为空,针对ddl返回时就为null
continue;
}
// 初步计算一下事件大小
long totalSize = bufferEntry.getHeader().getEventLength();
long eachSize = totalSize / parseDatas.size();
for (EventData eventData : parseDatas) {
if (eventData == null) {
continue;
}
// 记录一下大小
eventData.setSize(eachSize);
if (needLoopback) {
// 如果延迟超过指定的阀值,则设置为需要反查db
if (now - eventData.getExecuteTime() > 1000 * pipeline.getParameters().getRemedyDelayThresoldForMedia()) {
eventData.setSyncConsistency(SyncConsistency.MEDIA);
} else {
eventData.setSyncConsistency(SyncConsistency.BASE);
}
eventData.setRemedy(true);
}
eventDatas.add(eventData);
}
}
}
isLoopback = false;
needLoopback = false;
transactionDataBuffer.clear();
break;
default:
break;
}
}
// 添加最后一次的数据,可能没有TRANSACTIONEND
if (!isLoopback || (enableLoopbackRemedy && needLoopback)) {
// 添加数据解析
for (Entry bufferEntry : transactionDataBuffer) {
List<EventData> parseDatas = internParse(pipeline, bufferEntry);
if (CollectionUtils.isEmpty(parseDatas)) {
// 可能为空,针对ddl返回时就为null
continue;
}
// 初步计算一下事件大小
long totalSize = bufferEntry.getHeader().getEventLength();
long eachSize = totalSize / parseDatas.size();
for (EventData eventData : parseDatas) {
if (eventData == null) {
continue;
}
// 记录一下大小
eventData.setSize(eachSize);
if (needLoopback) {
// 如果延迟超过指定的阀值,则设置为需要反查db
if (now - eventData.getExecuteTime() > 1000 * pipeline.getParameters().getRemedyDelayThresoldForMedia()) {
eventData.setSyncConsistency(SyncConsistency.MEDIA);
} else {
eventData.setSyncConsistency(SyncConsistency.BASE);
}
}
eventDatas.add(eventData);
}
}
}
} catch (Exception e) {
throw new SelectException(e);
}
return eventDatas;
}
use of com.alibaba.otter.node.etl.select.exceptions.SelectException in project otter by alibaba.
the class SelectTask method processTermin.
private boolean processTermin(boolean lastStatus, Long batchId, Long processId) throws InterruptedException {
int retry = 0;
SelectException exception = null;
TerminEventData terminData = null;
while (retry++ < 30) {
// 因为存在网络因素,而且在Load进行termin处理时,因为是异步处理,有一定的概率会出现termin不按顺序过来
terminData = arbitrateEventService.terminEvent().await(pipelineId);
Long terminBatchId = terminData.getBatchId();
Long terminProcessId = terminData.getProcessId();
if (terminBatchId == null && processId != -1L && !processId.equals(terminProcessId)) {
// 针对manager发起rollback,terminBatchId可能为null,需要特殊处理下
exception = new SelectException("unmatched processId, SelectTask batchId = " + batchId + " processId = " + processId + " and Termin Event: " + terminData.toString());
// sleep 1秒,等新的数据包
Thread.sleep(1000);
} else if (terminBatchId != null && batchId != -1L && !batchId.equals(terminBatchId)) {
exception = new SelectException("unmatched terminId, SelectTask batchId = " + batchId + " processId = " + processId + " and Termin Event: " + terminData.toString());
// sleep 1秒,等新的数据包
Thread.sleep(1000);
} else {
// batchId/processId对上了,退出
exception = null;
break;
}
}
if (exception != null) {
throw exception;
}
if (needCheck) {
checkContinueWork();
}
boolean status = terminData.getType().isNormal();
if (lastStatus == false && status == true) {
// 上一批失败,这一批成功,说明调度有问题
throw new SelectException(String.format("last status is rollback , but now [batchId:%d , processId:%d] is ack", batchId, terminData.getProcessId()));
}
if (terminData.getType().isNormal()) {
ack(batchId);
sendDelayStat(pipelineId, terminData.getEndTime(), terminData.getFirstTime());
} else {
rollback(batchId);
}
// 先发送对应的数据
arbitrateEventService.terminEvent().ack(terminData);
return status;
}
use of com.alibaba.otter.node.etl.select.exceptions.SelectException in project otter by alibaba.
the class MessageParser method internParse.
/**
* 解析出从canal中获取的Event事件<br>
* Oracle:有变更的列值. <br>
* <i>insert:从afterColumns中获取所有的变更数据<br>
* <i>delete:从beforeColumns中获取所有的变更数据<br>
* <i>update:在before中存放所有的主键和变化前的非主键值,在after中存放变化后的主键和非主键值,如果是复合主键,只会存放变化的主键<br>
* Mysql:可以得到所有变更前和变更后的数据.<br>
* <i>insert:从afterColumns中获取所有的变更数据<br>
* <i>delete:从beforeColumns中获取所有的变更数据<br>
* <i>update:在beforeColumns中存放变更前的所有数据,在afterColumns中存放变更后的所有数据<br>
*/
private EventData internParse(Pipeline pipeline, Entry entry, RowChange rowChange, RowData rowData) {
EventData eventData = new EventData();
eventData.setTableName(entry.getHeader().getTableName());
eventData.setSchemaName(entry.getHeader().getSchemaName());
eventData.setEventType(EventType.valueOf(rowChange.getEventType().name()));
eventData.setExecuteTime(entry.getHeader().getExecuteTime());
EventType eventType = eventData.getEventType();
TableInfoHolder tableHolder = null;
if (!StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemSchema(), eventData.getSchemaName())) {
boolean useTableTransform = pipeline.getParameters().getUseTableTransform();
Table table = null;
DataMediaPair dataMediaPair = ConfigHelper.findDataMediaPairBySourceName(pipeline, eventData.getSchemaName(), eventData.getTableName());
DataMedia dataMedia = dataMediaPair.getSource();
eventData.setTableId(dataMedia.getId());
// 获取目标表
DataMedia targetDataMedia = dataMediaPair.getTarget();
if (useTableTransform || dataMedia.getSource().getType().isOracle()) {
// oracle需要反查一次meta
// 如果设置了需要进行table meta转化,则反查一下table信息
// 比如oracle erosa解析时可能使用了非物理主键,需要直接使用,信任erosa的信息
DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
// 查询一下meta信息
table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
if (table == null) {
logger.warn("find table[{}.{}] is null , may be drop table.", eventData.getSchemaName(), eventData.getTableName());
}
// 获取一下目标库的拆分字段,设置源表为主键
// 首先要求源和目标的库名表名是一致的
DbDialect targetDbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) targetDataMedia.getSource());
if (targetDbDialect.isDRDS()) {
String schemaName = buildName(eventData.getSchemaName(), dataMedia.getNamespaceMode(), targetDataMedia.getNamespaceMode());
String tableName = buildName(eventData.getSchemaName(), dataMedia.getNameMode(), targetDataMedia.getNameMode());
String shardColumns = targetDbDialect.getShardColumns(schemaName, tableName);
if (StringUtils.isNotEmpty(shardColumns)) {
String[] columns = StringUtils.split(shardColumns, ',');
for (String key : columns) {
org.apache.ddlutils.model.Column col = table.findColumn(key, false);
if (col != null) {
col.setPrimaryKey(true);
} else {
logger.warn(String.format("shardColumn %s in table[%s.%s] is not found", key, eventData.getSchemaName(), eventData.getTableName()));
}
}
}
}
tableHolder = new TableInfoHolder(dbDialect, table, useTableTransform);
}
}
List<Column> beforeColumns = rowData.getBeforeColumnsList();
List<Column> afterColumns = rowData.getAfterColumnsList();
String tableName = eventData.getSchemaName() + "." + eventData.getTableName();
// 判断一下是否需要all columns
// 如果是rowMode模式,所有字段都需要标记为updated
boolean isRowMode = pipeline.getParameters().getSyncMode().isRow();
boolean needAllColumns = isRowMode || checkNeedAllColumns(pipeline);
// 变更后的主键
Map<String, EventColumn> keyColumns = new LinkedHashMap<String, EventColumn>();
// 变更前的主键
Map<String, EventColumn> oldKeyColumns = new LinkedHashMap<String, EventColumn>();
// 有变化的非主键
Map<String, EventColumn> notKeyColumns = new LinkedHashMap<String, EventColumn>();
if (eventType.isInsert()) {
for (Column column : afterColumns) {
if (isKey(tableHolder, tableName, column)) {
keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
} else {
// mysql 有效
notKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
}
}
} else if (eventType.isDelete()) {
for (Column column : beforeColumns) {
if (isKey(tableHolder, tableName, column)) {
keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
} else {
// mysql 有效
notKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
}
}
} else if (eventType.isUpdate()) {
// 获取变更前的主键.
for (Column column : beforeColumns) {
if (isKey(tableHolder, tableName, column)) {
oldKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
// 同时记录一下new
// key,因为mysql5.6之后出现了minimal模式,after里会没有主键信息,需要在before记录中找
keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
} else {
if (needAllColumns && entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE) {
// 针对行记录同步时,针对oracle记录一下非主键的字段,因为update时针对未变更的字段在aftercolume里没有
notKeyColumns.put(column.getName(), copyEventColumn(column, isRowMode, tableHolder));
}
}
}
for (Column column : afterColumns) {
if (isKey(tableHolder, tableName, column)) {
// 获取变更后的主键
keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
} else if (needAllColumns || entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE || column.getUpdated()) {
// 在update操作时,oracle和mysql存放变更的非主键值的方式不同,oracle只有变更的字段;
// mysql会把变更前和变更后的字段都发出来,只需要取有变更的字段.
// 如果是oracle库,after里一定为对应的变更字段
boolean isUpdate = true;
if (entry.getHeader().getSourceType() == CanalEntry.Type.MYSQL) {
// mysql的after里部分数据为未变更,oracle里after里为变更字段
isUpdate = column.getUpdated();
}
// 如果是rowMode,所有字段都为updated
notKeyColumns.put(column.getName(), copyEventColumn(column, isRowMode || isUpdate, tableHolder));
}
}
if (entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE) {
// 针对oracle进行特殊处理
checkUpdateKeyColumns(oldKeyColumns, keyColumns);
}
}
List<EventColumn> keys = new ArrayList<EventColumn>(keyColumns.values());
List<EventColumn> oldKeys = new ArrayList<EventColumn>(oldKeyColumns.values());
List<EventColumn> columns = new ArrayList<EventColumn>(notKeyColumns.values());
Collections.sort(keys, new EventColumnIndexComparable());
Collections.sort(oldKeys, new EventColumnIndexComparable());
Collections.sort(columns, new EventColumnIndexComparable());
if (!keyColumns.isEmpty()) {
eventData.setKeys(keys);
if (eventData.getEventType().isUpdate() && !oldKeys.equals(keys)) {
// update类型,如果存在主键不同,则记录下old
// keys为变更前的主键
eventData.setOldKeys(oldKeys);
}
eventData.setColumns(columns);
// } else if (CanalEntry.Type.MYSQL ==
// entry.getHeader().getSourceType()) {
// // 只支持mysql无主键同步
// if (eventType.isUpdate()) {
// List<EventColumn> oldColumns = new ArrayList<EventColumn>();
// List<EventColumn> newColumns = new ArrayList<EventColumn>();
// for (Column column : beforeColumns) {
// oldColumns.add(copyEventColumn(column, true, tableHolder));
// }
//
// for (Column column : afterColumns) {
// newColumns.add(copyEventColumn(column, true, tableHolder));
// }
// Collections.sort(oldColumns, new EventColumnIndexComparable());
// Collections.sort(newColumns, new EventColumnIndexComparable());
// eventData.setOldKeys(oldColumns);// 做为老主键
// eventData.setKeys(newColumns);// 做为新主键,需要保证新老主键字段数量一致
// } else {
// // 针对无主键,等同为所有都是主键进行处理
// eventData.setKeys(columns);
// }
} else {
throw new SelectException("this rowdata has no pks , entry: " + entry.toString() + " and rowData: " + rowData);
}
return eventData;
}
use of com.alibaba.otter.node.etl.select.exceptions.SelectException in project otter by alibaba.
the class MessageParser method internParse.
private List<EventData> internParse(Pipeline pipeline, Entry entry) {
RowChange rowChange = null;
try {
rowChange = RowChange.parseFrom(entry.getStoreValue());
} catch (Exception e) {
throw new SelectException("parser of canal-event has an error , data:" + entry.toString(), e);
}
if (rowChange == null) {
return null;
}
String schemaName = entry.getHeader().getSchemaName();
String tableName = entry.getHeader().getTableName();
EventType eventType = EventType.valueOf(rowChange.getEventType().name());
// 处理下DDL操作
if (eventType.isQuery()) {
// 直接忽略query事件
return null;
}
// 首先判断是否为系统表
if (StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemSchema(), schemaName)) {
// do noting
if (eventType.isDdl()) {
return null;
}
if (StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemDualTable(), tableName)) {
// 心跳表数据直接忽略
return null;
}
} else {
if (eventType.isDdl()) {
boolean notExistReturnNull = false;
if (eventType.isRename()) {
notExistReturnNull = true;
}
DataMedia dataMedia = ConfigHelper.findSourceDataMedia(pipeline, schemaName, tableName, notExistReturnNull);
// DataMediaInfo;并且把CREATE/ALTER类型的事件丢弃掉.
if (dataMedia != null && (eventType.isCreate() || eventType.isAlter() || eventType.isRename())) {
DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
// 更新下meta信息
dbDialect.reloadTable(schemaName, tableName);
}
boolean ddlSync = pipeline.getParameters().getDdlSync();
if (ddlSync) {
// 处理下ddl操作
EventData eventData = new EventData();
eventData.setSchemaName(schemaName);
eventData.setTableName(tableName);
eventData.setEventType(eventType);
eventData.setExecuteTime(entry.getHeader().getExecuteTime());
eventData.setSql(rowChange.getSql());
eventData.setDdlSchemaName(rowChange.getDdlSchemaName());
eventData.setTableId(dataMedia.getId());
return Arrays.asList(eventData);
} else {
return null;
}
}
}
List<EventData> eventDatas = new ArrayList<EventData>();
for (RowData rowData : rowChange.getRowDatasList()) {
EventData eventData = internParse(pipeline, entry, rowChange, rowData);
if (eventData != null) {
eventDatas.add(eventData);
}
}
return eventDatas;
}
use of com.alibaba.otter.node.etl.select.exceptions.SelectException in project otter by alibaba.
the class CanalFilterSupport method makeFilterExpression.
/**
* 构建filter 表达式
*/
public static String makeFilterExpression(Pipeline pipeline) {
List<DataMediaPair> dataMediaPairs = pipeline.getPairs();
if (dataMediaPairs.isEmpty()) {
throw new SelectException("ERROR ## the pair is empty,the pipeline id = " + pipeline.getId());
}
Set<String> mediaNames = new HashSet<String>();
for (DataMediaPair dataMediaPair : dataMediaPairs) {
DataMedia.ModeValue namespaceMode = dataMediaPair.getSource().getNamespaceMode();
DataMedia.ModeValue nameMode = dataMediaPair.getSource().getNameMode();
if (namespaceMode.getMode().isSingle()) {
buildFilter(mediaNames, namespaceMode.getSingleValue(), nameMode, false);
} else if (namespaceMode.getMode().isMulti()) {
for (String namespace : namespaceMode.getMultiValue()) {
buildFilter(mediaNames, namespace, nameMode, false);
}
} else if (namespaceMode.getMode().isWildCard()) {
buildFilter(mediaNames, namespaceMode.getSingleValue(), nameMode, true);
}
}
StringBuilder result = new StringBuilder();
Iterator<String> iter = mediaNames.iterator();
int i = -1;
while (iter.hasNext()) {
i++;
if (i == 0) {
result.append(iter.next());
} else {
result.append(",").append(iter.next());
}
}
String markTable = pipeline.getParameters().getSystemSchema() + "." + pipeline.getParameters().getSystemMarkTable();
String bufferTable = pipeline.getParameters().getSystemSchema() + "." + pipeline.getParameters().getSystemBufferTable();
String dualTable = pipeline.getParameters().getSystemSchema() + "." + pipeline.getParameters().getSystemDualTable();
if (!mediaNames.contains(markTable)) {
result.append(",").append(markTable);
}
if (!mediaNames.contains(bufferTable)) {
result.append(",").append(bufferTable);
}
if (!mediaNames.contains(dualTable)) {
result.append(",").append(dualTable);
}
return result.toString();
}
Aggregations