Search in sources :

Example 36 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class MessageParser method internParse.

/**
     * 解析出从canal中获取的Event事件<br>
     * Oracle:有变更的列值. <br>
     * <i>insert:从afterColumns中获取所有的变更数据<br>
     * <i>delete:从beforeColumns中获取所有的变更数据<br>
     * <i>update:在before中存放所有的主键和变化前的非主键值,在after中存放变化后的主键和非主键值,如果是复合主键,只会存放变化的主键<br>
     * Mysql:可以得到所有变更前和变更后的数据.<br>
     * <i>insert:从afterColumns中获取所有的变更数据<br>
     * <i>delete:从beforeColumns中获取所有的变更数据<br>
     * <i>update:在beforeColumns中存放变更前的所有数据,在afterColumns中存放变更后的所有数据<br>
     */
private EventData internParse(Pipeline pipeline, Entry entry, RowChange rowChange, RowData rowData) {
    EventData eventData = new EventData();
    eventData.setTableName(entry.getHeader().getTableName());
    eventData.setSchemaName(entry.getHeader().getSchemaName());
    eventData.setEventType(EventType.valueOf(rowChange.getEventType().name()));
    eventData.setExecuteTime(entry.getHeader().getExecuteTime());
    EventType eventType = eventData.getEventType();
    TableInfoHolder tableHolder = null;
    if (!StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemSchema(), eventData.getSchemaName())) {
        boolean useTableTransform = pipeline.getParameters().getUseTableTransform();
        Table table = null;
        DataMediaPair dataMediaPair = ConfigHelper.findDataMediaPairBySourceName(pipeline, eventData.getSchemaName(), eventData.getTableName());
        DataMedia dataMedia = dataMediaPair.getSource();
        eventData.setTableId(dataMedia.getId());
        // 获取目标表
        DataMedia targetDataMedia = dataMediaPair.getTarget();
        if (useTableTransform || dataMedia.getSource().getType().isOracle()) {
            // oracle需要反查一次meta
            // 如果设置了需要进行table meta转化,则反查一下table信息
            // 比如oracle erosa解析时可能使用了非物理主键,需要直接使用,信任erosa的信息
            DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
            // 查询一下meta信息
            table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
            if (table == null) {
                logger.warn("find table[{}.{}] is null , may be drop table.", eventData.getSchemaName(), eventData.getTableName());
            }
            // 获取一下目标库的拆分字段,设置源表为主键
            // 首先要求源和目标的库名表名是一致的
            DbDialect targetDbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) targetDataMedia.getSource());
            if (targetDbDialect.isDRDS()) {
                String schemaName = buildName(eventData.getSchemaName(), dataMedia.getNamespaceMode(), targetDataMedia.getNamespaceMode());
                String tableName = buildName(eventData.getSchemaName(), dataMedia.getNameMode(), targetDataMedia.getNameMode());
                String shardColumns = targetDbDialect.getShardColumns(schemaName, tableName);
                if (StringUtils.isNotEmpty(shardColumns)) {
                    String[] columns = StringUtils.split(shardColumns, ',');
                    for (String key : columns) {
                        org.apache.ddlutils.model.Column col = table.findColumn(key, false);
                        if (col != null) {
                            col.setPrimaryKey(true);
                        } else {
                            logger.warn(String.format("shardColumn %s in table[%s.%s] is not found", key, eventData.getSchemaName(), eventData.getTableName()));
                        }
                    }
                }
            }
            tableHolder = new TableInfoHolder(dbDialect, table, useTableTransform);
        }
    }
    List<Column> beforeColumns = rowData.getBeforeColumnsList();
    List<Column> afterColumns = rowData.getAfterColumnsList();
    String tableName = eventData.getSchemaName() + "." + eventData.getTableName();
    // 判断一下是否需要all columns
    // 如果是rowMode模式,所有字段都需要标记为updated
    boolean isRowMode = pipeline.getParameters().getSyncMode().isRow();
    boolean needAllColumns = isRowMode || checkNeedAllColumns(pipeline);
    // 变更后的主键
    Map<String, EventColumn> keyColumns = new LinkedHashMap<String, EventColumn>();
    // 变更前的主键
    Map<String, EventColumn> oldKeyColumns = new LinkedHashMap<String, EventColumn>();
    // 有变化的非主键
    Map<String, EventColumn> notKeyColumns = new LinkedHashMap<String, EventColumn>();
    if (eventType.isInsert()) {
        for (Column column : afterColumns) {
            if (isKey(tableHolder, tableName, column)) {
                keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            } else {
                // mysql 有效
                notKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            }
        }
    } else if (eventType.isDelete()) {
        for (Column column : beforeColumns) {
            if (isKey(tableHolder, tableName, column)) {
                keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            } else {
                // mysql 有效
                notKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            }
        }
    } else if (eventType.isUpdate()) {
        // 获取变更前的主键.
        for (Column column : beforeColumns) {
            if (isKey(tableHolder, tableName, column)) {
                oldKeyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
                // 同时记录一下new
                // key,因为mysql5.6之后出现了minimal模式,after里会没有主键信息,需要在before记录中找
                keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            } else {
                if (needAllColumns && entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE) {
                    // 针对行记录同步时,针对oracle记录一下非主键的字段,因为update时针对未变更的字段在aftercolume里没有
                    notKeyColumns.put(column.getName(), copyEventColumn(column, isRowMode, tableHolder));
                }
            }
        }
        for (Column column : afterColumns) {
            if (isKey(tableHolder, tableName, column)) {
                // 获取变更后的主键
                keyColumns.put(column.getName(), copyEventColumn(column, true, tableHolder));
            } else if (needAllColumns || entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE || column.getUpdated()) {
                // 在update操作时,oracle和mysql存放变更的非主键值的方式不同,oracle只有变更的字段;
                // mysql会把变更前和变更后的字段都发出来,只需要取有变更的字段.
                // 如果是oracle库,after里一定为对应的变更字段
                boolean isUpdate = true;
                if (entry.getHeader().getSourceType() == CanalEntry.Type.MYSQL) {
                    // mysql的after里部分数据为未变更,oracle里after里为变更字段
                    isUpdate = column.getUpdated();
                }
                // 如果是rowMode,所有字段都为updated
                notKeyColumns.put(column.getName(), copyEventColumn(column, isRowMode || isUpdate, tableHolder));
            }
        }
        if (entry.getHeader().getSourceType() == CanalEntry.Type.ORACLE) {
            // 针对oracle进行特殊处理
            checkUpdateKeyColumns(oldKeyColumns, keyColumns);
        }
    }
    List<EventColumn> keys = new ArrayList<EventColumn>(keyColumns.values());
    List<EventColumn> oldKeys = new ArrayList<EventColumn>(oldKeyColumns.values());
    List<EventColumn> columns = new ArrayList<EventColumn>(notKeyColumns.values());
    Collections.sort(keys, new EventColumnIndexComparable());
    Collections.sort(oldKeys, new EventColumnIndexComparable());
    Collections.sort(columns, new EventColumnIndexComparable());
    if (!keyColumns.isEmpty()) {
        eventData.setKeys(keys);
        if (eventData.getEventType().isUpdate() && !oldKeys.equals(keys)) {
            // update类型,如果存在主键不同,则记录下old
            // keys为变更前的主键
            eventData.setOldKeys(oldKeys);
        }
        eventData.setColumns(columns);
    // } else if (CanalEntry.Type.MYSQL ==
    // entry.getHeader().getSourceType()) {
    // // 只支持mysql无主键同步
    // if (eventType.isUpdate()) {
    // List<EventColumn> oldColumns = new ArrayList<EventColumn>();
    // List<EventColumn> newColumns = new ArrayList<EventColumn>();
    // for (Column column : beforeColumns) {
    // oldColumns.add(copyEventColumn(column, true, tableHolder));
    // }
    //
    // for (Column column : afterColumns) {
    // newColumns.add(copyEventColumn(column, true, tableHolder));
    // }
    // Collections.sort(oldColumns, new EventColumnIndexComparable());
    // Collections.sort(newColumns, new EventColumnIndexComparable());
    // eventData.setOldKeys(oldColumns);// 做为老主键
    // eventData.setKeys(newColumns);// 做为新主键,需要保证新老主键字段数量一致
    // } else {
    // // 针对无主键,等同为所有都是主键进行处理
    // eventData.setKeys(columns);
    // }
    } else {
        throw new SelectException("this rowdata has no pks , entry: " + entry.toString() + " and rowData: " + rowData);
    }
    return eventData;
}
Also used : Table(org.apache.ddlutils.model.Table) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) EventType(com.alibaba.otter.shared.etl.model.EventType) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList) SelectException(com.alibaba.otter.node.etl.select.exceptions.SelectException) EventData(com.alibaba.otter.shared.etl.model.EventData) LinkedHashMap(java.util.LinkedHashMap) EventColumnIndexComparable(com.alibaba.otter.shared.etl.model.EventColumnIndexComparable) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) Column(com.alibaba.otter.canal.protocol.CanalEntry.Column) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia)

Example 37 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class MessageParser method internParse.

private List<EventData> internParse(Pipeline pipeline, Entry entry) {
    RowChange rowChange = null;
    try {
        rowChange = RowChange.parseFrom(entry.getStoreValue());
    } catch (Exception e) {
        throw new SelectException("parser of canal-event has an error , data:" + entry.toString(), e);
    }
    if (rowChange == null) {
        return null;
    }
    String schemaName = entry.getHeader().getSchemaName();
    String tableName = entry.getHeader().getTableName();
    EventType eventType = EventType.valueOf(rowChange.getEventType().name());
    // 处理下DDL操作
    if (eventType.isQuery()) {
        // 直接忽略query事件
        return null;
    }
    // 首先判断是否为系统表
    if (StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemSchema(), schemaName)) {
        // do noting
        if (eventType.isDdl()) {
            return null;
        }
        if (StringUtils.equalsIgnoreCase(pipeline.getParameters().getSystemDualTable(), tableName)) {
            // 心跳表数据直接忽略
            return null;
        }
    } else {
        if (eventType.isDdl()) {
            boolean notExistReturnNull = false;
            if (eventType.isRename()) {
                notExistReturnNull = true;
            }
            DataMedia dataMedia = ConfigHelper.findSourceDataMedia(pipeline, schemaName, tableName, notExistReturnNull);
            // DataMediaInfo;并且把CREATE/ALTER类型的事件丢弃掉.
            if (dataMedia != null && (eventType.isCreate() || eventType.isAlter() || eventType.isRename())) {
                DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
                // 更新下meta信息
                dbDialect.reloadTable(schemaName, tableName);
            }
            boolean ddlSync = pipeline.getParameters().getDdlSync();
            if (ddlSync) {
                // 处理下ddl操作
                EventData eventData = new EventData();
                eventData.setSchemaName(schemaName);
                eventData.setTableName(tableName);
                eventData.setEventType(eventType);
                eventData.setExecuteTime(entry.getHeader().getExecuteTime());
                eventData.setSql(rowChange.getSql());
                eventData.setDdlSchemaName(rowChange.getDdlSchemaName());
                eventData.setTableId(dataMedia.getId());
                return Arrays.asList(eventData);
            } else {
                return null;
            }
        }
    }
    List<EventData> eventDatas = new ArrayList<EventData>();
    for (RowData rowData : rowChange.getRowDatasList()) {
        EventData eventData = internParse(pipeline, entry, rowChange, rowData);
        if (eventData != null) {
            eventDatas.add(eventData);
        }
    }
    return eventDatas;
}
Also used : RowData(com.alibaba.otter.canal.protocol.CanalEntry.RowData) RowChange(com.alibaba.otter.canal.protocol.CanalEntry.RowChange) EventType(com.alibaba.otter.shared.etl.model.EventType) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) SelectException(com.alibaba.otter.node.etl.select.exceptions.SelectException) ArrayList(java.util.ArrayList) SelectException(com.alibaba.otter.node.etl.select.exceptions.SelectException) TransformException(com.alibaba.otter.node.etl.transform.exception.TransformException) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia) EventData(com.alibaba.otter.shared.etl.model.EventData)

Example 38 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class OtterTransformerTest method test_rowData_oracle_mysql.

@Test
public void test_rowData_oracle_mysql() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
    DataMediaPair pair1 = new DataMediaPair();
    pair1.setId(1L);
    pair1.setPipelineId(pipeline.getId());
    pair1.setPullWeight(1L);
    pair1.setPushWeight(1L);
    DbDataMedia oracleMedia = getOracleMedia();
    oracleMedia.setId(1L);
    pair1.setSource(oracleMedia);
    DbDataMedia mysqlMedia = getMysqlMedia();
    pair1.setTarget(mysqlMedia);
    pairs.add(pair1);
    pipeline.setPairs(pairs);
    PipelineParameter param = new PipelineParameter();
    param.setSyncMode(SyncMode.ROW);
    pipeline.setParameters(param);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(anyLong);
            returns(pipeline);
        }
    };
    Identity identity = new Identity();
    identity.setChannelId(100L);
    identity.setPipelineId(100L);
    identity.setProcessId(100L);
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    EventData eventData = new EventData();
    eventData.setTableId(1L);
    eventData.setSchemaName("srf");
    eventData.setTableName("columns");
    eventData.setEventType(EventType.UPDATE);
    eventData.setExecuteTime(100L);
    eventData.getKeys().add(buildColumn("id", Types.NUMERIC, "1", true, false));
    eventData.getKeys().add(buildColumn("name", Types.VARCHAR, "ljh", true, false));
    eventData.getColumns().add(buildColumn("alias_name", Types.CHAR, "hello", false, false));
    eventData.getColumns().add(buildColumn("amount", Types.NUMERIC, "100.01", false, false));
    eventData.getColumns().add(buildColumn("text_b", Types.BLOB, "[116,101,120,116,95,98]", false, false));
    eventData.getColumns().add(buildColumn("text_c", Types.CLOB, "text_c", false, false));
    eventData.getColumns().add(buildColumn("curr_date", Types.DATE, "2011-01-01", false, false));
    eventData.getColumns().add(buildColumn("gmt_create", Types.DATE, "2011-01-01 11:11:11", false, false));
    eventData.getColumns().add(buildColumn("gmt_modify", Types.DATE, "2011-01-01 11:11:11", false, false));
    rowBatch.merge(eventData);
    Map<Class, BatchObject> batchs = otterTransformFactory.transform(rowBatch);
    RowBatch result = (RowBatch) batchs.get(EventData.class);
    want.number(result.getDatas().size()).isEqualTo(1);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) ArrayList(java.util.ArrayList) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) BatchObject(com.alibaba.otter.shared.etl.model.BatchObject) PipelineParameter(com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter) Identity(com.alibaba.otter.shared.etl.model.Identity) DbDataMedia(com.alibaba.otter.shared.common.model.config.data.db.DbDataMedia) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Example 39 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class HttpPipeIntegration method test_rowData.

@Test
public void test_rowData() {
    final Node currentNode = new Node();
    currentNode.setId(1L);
    currentNode.setIp("127.0.0.1");
    currentNode.setParameters(new NodeParameter());
    final Pipeline pipeline = new Pipeline();
    pipeline.getParameters().setRetriever(RetrieverType.ARIA2C);
    // mock一下
    new NonStrictExpectations() {

        {
            configClientService.currentNode();
            returns(currentNode);
            configClientService.findPipeline(anyLong);
            returns(pipeline);
        }
    };
    Identity identity = new Identity();
    identity.setChannelId(100L);
    identity.setPipelineId(100L);
    identity.setProcessId(100L);
    FileBatch fileBatch = new FileBatch();
    fileBatch.setIdentity(identity);
    File localFile = new File(tmp, "httpPipeTest.jpg");
    FileData localFileData = new FileData();
    localFileData.setPath(localFile.getPath());
    localFileData.setEventType(EventType.INSERT);
    localFileData.setLastModifiedTime(new Date().getTime());
    localFileData.setSize(100L);
    localFileData.setTableId(1L);
    fileBatch.getFiles().add(localFileData);
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    EventData eventData = new EventData();
    eventData.setTableId(1L);
    eventData.setSchemaName("otter");
    eventData.setTableName("test");
    eventData.setEventType(EventType.INSERT);
    eventData.setExecuteTime(100L);
    EventColumn primaryKey = new EventColumn();
    primaryKey.setColumnName("id");
    primaryKey.setColumnType(1);
    primaryKey.setColumnValue("1");
    primaryKey.setKey(true);
    primaryKey.setNull(false);
    eventData.getKeys().add(primaryKey);
    EventColumn column = new EventColumn();
    column.setColumnName("name");
    column.setColumnType(1);
    column.setColumnValue("test");
    column.setKey(false);
    column.setNull(false);
    eventData.getColumns().add(column);
    rowBatch.merge(eventData);
    DbBatch dbBatch = new DbBatch();
    dbBatch.setRowBatch(rowBatch);
    dbBatch.setFileBatch(fileBatch);
    HttpPipeKey key = rowDataHttpPipe.put(dbBatch);
    DbBatch target = rowDataHttpPipe.get(key);
    want.bool(target.getRowBatch().getIdentity().equals(identity));
    want.object(target).notNull();
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) Node(com.alibaba.otter.shared.common.model.config.node.Node) Date(java.util.Date) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) HttpPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) NodeParameter(com.alibaba.otter.shared.common.model.config.node.NodeParameter) Identity(com.alibaba.otter.shared.etl.model.Identity) File(java.io.File) FileData(com.alibaba.otter.shared.etl.model.FileData) Test(org.testng.annotations.Test) BaseOtterTest(com.alibaba.otter.node.etl.BaseOtterTest)

Example 40 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DatabaseExtractorTest method test_override_field.

public void test_override_field() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    pipeline.getParameters().setSyncMode(SyncMode.FIELD);
    // 设置为全局
    pipeline.getParameters().setSyncConsistency(SyncConsistency.BASE);
    int start = RandomUtils.nextInt();
    int count = 10;
    List<DataMediaPair> pairs = getDataMediaPairForOracle(start, count);
    pipeline.setPairs(pairs);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(100L);
            returns(pipeline);
        }
    };
    // 构造数据
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    for (int tableId = start; tableId < start + count; tableId++) {
        for (int i = start; i < start + count; i++) {
            EventData eventData = getEventData(tableId, i);
            eventData.setSchemaName("srf");
            eventData.setTableName("columns");
            eventData.setSyncConsistency(SyncConsistency.MEDIA);
            rowBatch.merge(eventData);
        }
    }
    databaseExtractor.extract(new DbBatch(rowBatch));
    want.number(rowBatch.getDatas().size()).isEqualTo(count);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Aggregations

EventData (com.alibaba.otter.shared.etl.model.EventData)48 ArrayList (java.util.ArrayList)20 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)19 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)18 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)16 Test (org.testng.annotations.Test)16 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)15 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)14 Identity (com.alibaba.otter.shared.etl.model.Identity)9 MapMaker (com.google.common.collect.MapMaker)9 RowKey (com.alibaba.otter.node.etl.load.loader.db.DbLoadMerger.RowKey)8 DataMedia (com.alibaba.otter.shared.common.model.config.data.DataMedia)8 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)7 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)5 ExtractException (com.alibaba.otter.node.etl.extract.exceptions.ExtractException)5 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)4 FileData (com.alibaba.otter.shared.etl.model.FileData)4 SelectException (com.alibaba.otter.node.etl.select.exceptions.SelectException)3 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)3 EventType (com.alibaba.otter.shared.etl.model.EventType)3