Search in sources :

Example 1 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class OtterTransformerTest method test_rowData_mysql_oracle.

@Test
public void test_rowData_mysql_oracle() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
    DataMediaPair pair1 = new DataMediaPair();
    pair1.setId(1L);
    pair1.setPipelineId(pipeline.getId());
    pair1.setPullWeight(1L);
    pair1.setPushWeight(1L);
    DbDataMedia mysqlMedia = getMysqlMedia();
    mysqlMedia.setId(1L);
    pair1.setSource(mysqlMedia);
    DbDataMedia oracleMedia = getOracleMedia();
    pair1.setTarget(oracleMedia);
    pairs.add(pair1);
    pipeline.setPairs(pairs);
    PipelineParameter param = new PipelineParameter();
    param.setSyncMode(SyncMode.ROW);
    pipeline.setParameters(param);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(anyLong);
            returns(pipeline);
        }
    };
    Identity identity = new Identity();
    identity.setChannelId(100L);
    identity.setPipelineId(100L);
    identity.setProcessId(100L);
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    EventData eventData = new EventData();
    eventData.setTableId(1L);
    eventData.setSchemaName("srf");
    eventData.setTableName("columns");
    eventData.setEventType(EventType.UPDATE);
    eventData.setExecuteTime(100L);
    eventData.getKeys().add(buildColumn("id", Types.INTEGER, "1", true, false));
    eventData.getKeys().add(buildColumn("name", Types.VARCHAR, "ljh", true, false));
    eventData.getColumns().add(buildColumn("alias_name", Types.CHAR, "hello", false, false));
    eventData.getColumns().add(buildColumn("amount", Types.DECIMAL, "100.01", false, false));
    eventData.getColumns().add(buildColumn("text_b", Types.BLOB, "[116,101,120,116,95,98]", false, false));
    eventData.getColumns().add(buildColumn("text_c", Types.CLOB, "text_c", false, false));
    eventData.getColumns().add(buildColumn("curr_date", Types.DATE, "2011-01-01", false, false));
    eventData.getColumns().add(buildColumn("gmt_create", Types.TIMESTAMP, "2011-01-01 11:11:11", false, false));
    eventData.getColumns().add(buildColumn("gmt_modify", Types.TIMESTAMP, "2011-01-01 11:11:11", false, false));
    rowBatch.merge(eventData);
    Map<Class, BatchObject> batchs = otterTransformFactory.transform(rowBatch);
    RowBatch result = (RowBatch) batchs.get(EventData.class);
    want.number(result.getDatas().size()).isEqualTo(1);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) ArrayList(java.util.ArrayList) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) BatchObject(com.alibaba.otter.shared.etl.model.BatchObject) PipelineParameter(com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter) Identity(com.alibaba.otter.shared.etl.model.Identity) DbDataMedia(com.alibaba.otter.shared.common.model.config.data.db.DbDataMedia) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Example 2 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class RowDataTransformer method transform.

public EventData transform(EventData data, OtterTransformerContext context) {
    EventData result = new EventData();
    // 处理Table转化
    DataMedia dataMedia = context.getDataMediaPair().getTarget();
    result.setPairId(context.getDataMediaPair().getId());
    result.setTableId(dataMedia.getId());
    // 需要特殊处理下multi场景
    buildName(data, result, context.getDataMediaPair());
    result.setEventType(data.getEventType());
    result.setExecuteTime(data.getExecuteTime());
    result.setSyncConsistency(data.getSyncConsistency());
    result.setRemedy(data.isRemedy());
    result.setSyncMode(data.getSyncMode());
    result.setSize(data.getSize());
    result.setHint(data.getHint());
    result.setWithoutSchema(data.isWithoutSchema());
    if (data.getEventType().isDdl()) {
        // ddl不需要处理字段
        if (StringUtils.equalsIgnoreCase(result.getSchemaName(), data.getSchemaName()) && StringUtils.equalsIgnoreCase(result.getTableName(), data.getTableName())) {
            // 是否需要对ddl sql进行转化,暂时不支持异构,必须保证源表和目标表的名字相同
            result.setDdlSchemaName(data.getDdlSchemaName());
            result.setSql(data.getSql());
            return result;
        } else {
            throw new TransformException("no support ddl for [" + data.getSchemaName() + "." + data.getTableName() + "] to [" + result.getSchemaName() + "." + result.getTableName() + "] , sql :" + data.getSql());
        }
    }
    Multimap<String, String> translateColumnNames = HashMultimap.create();
    if (context.getDataMediaPair().getColumnPairMode().isInclude()) {
        // 只针对正向匹配进行名字映射,exclude不做处理
        List<ColumnPair> columnPairs = context.getDataMediaPair().getColumnPairs();
        for (ColumnPair columnPair : columnPairs) {
            translateColumnNames.put(columnPair.getSourceColumn().getName(), columnPair.getTargetColumn().getName());
        }
    }
    // 准备一下table meta
    DataMediaPair dataMediaPair = context.getDataMediaPair();
    boolean useTableTransform = context.getPipeline().getParameters().getUseTableTransform();
    boolean enableCompatibleMissColumn = context.getPipeline().getParameters().getEnableCompatibleMissColumn();
    TableInfoHolder tableHolder = null;
    if (useTableTransform || enableCompatibleMissColumn) {
        // 控制一下是否需要反查table
        // meta信息,如果同构数据库,完全没必要反查
        // 获取目标库的表信息
        DbDialect dbDialect = dbDialectFactory.getDbDialect(dataMediaPair.getPipelineId(), (DbMediaSource) dataMedia.getSource());
        Table table = dbDialect.findTable(result.getSchemaName(), result.getTableName());
        tableHolder = new TableInfoHolder(table, useTableTransform, enableCompatibleMissColumn);
    }
    // 处理column转化
    List<EventColumn> otherColumns = translateColumns(result, data.getColumns(), context.getDataMediaPair(), translateColumnNames, tableHolder);
    translatePkColumn(result, data.getKeys(), data.getOldKeys(), otherColumns, context.getDataMediaPair(), translateColumnNames, tableHolder);
    result.setColumns(otherColumns);
    return result;
}
Also used : ColumnPair(com.alibaba.otter.shared.common.model.config.data.ColumnPair) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) Table(org.apache.ddlutils.model.Table) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) TransformException(com.alibaba.otter.node.etl.transform.exception.TransformException) EventData(com.alibaba.otter.shared.etl.model.EventData) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia)

Example 3 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class MessageParser method parse.

/**
     * 将对应canal送出来的Entry对象解析为otter使用的内部对象
     * 
     * <pre>
     * 需要处理数据过滤:
     * 1. Transaction Begin/End过滤
     * 2. retl.retl_client/retl.retl_mark 回环标记处理以及后续的回环数据过滤
     * 3. retl.xdual canal心跳表数据过滤
     * </pre>
     */
public List<EventData> parse(Long pipelineId, List<Entry> datas) throws SelectException {
    List<EventData> eventDatas = new ArrayList<EventData>();
    Pipeline pipeline = configClientService.findPipeline(pipelineId);
    List<Entry> transactionDataBuffer = new ArrayList<Entry>();
    // hz为主站点,us->hz的数据,需要回环同步会us。并且需要开启回环补救算法
    PipelineParameter pipelineParameter = pipeline.getParameters();
    boolean enableLoopbackRemedy = pipelineParameter.isEnableRemedy() && pipelineParameter.isHome() && pipelineParameter.getRemedyAlgorithm().isLoopback();
    boolean isLoopback = false;
    // 判断是否属于需要loopback处理的类型,只处理正常otter同步产生的回环数据,因为会有业务方手工屏蔽同步的接口,避免回环
    boolean needLoopback = false;
    long now = new Date().getTime();
    try {
        for (Entry entry : datas) {
            switch(entry.getEntryType()) {
                case TRANSACTIONBEGIN:
                    isLoopback = false;
                    break;
                case ROWDATA:
                    String tableName = entry.getHeader().getTableName();
                    // 判断是否是回环表retl_mark
                    boolean isMarkTable = tableName.equalsIgnoreCase(pipeline.getParameters().getSystemMarkTable());
                    if (isMarkTable) {
                        RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
                        if (!rowChange.getIsDdl()) {
                            int loopback = checkLoopback(pipeline, rowChange.getRowDatas(0));
                            if (loopback == 2) {
                                // 只处理正常同步产生的回环数据
                                needLoopback |= true;
                            }
                            isLoopback |= loopback > 0;
                        }
                    }
                    // 检查下otter3.0的回环表,对应的schmea会比较随意,所以不做比较
                    boolean isCompatibleLoopback = tableName.equalsIgnoreCase(compatibleMarkTable);
                    if (isCompatibleLoopback) {
                        RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
                        if (!rowChange.getIsDdl()) {
                            int loopback = checkCompatibleLoopback(pipeline, rowChange.getRowDatas(0));
                            if (loopback == 2) {
                                // 只处理正常同步产生的回环数据
                                needLoopback |= true;
                            }
                            isLoopback |= loopback > 0;
                        }
                    }
                    if ((!isLoopback || (enableLoopbackRemedy && needLoopback)) && !isMarkTable && !isCompatibleLoopback) {
                        transactionDataBuffer.add(entry);
                    }
                    break;
                case TRANSACTIONEND:
                    if (!isLoopback || (enableLoopbackRemedy && needLoopback)) {
                        // 添加数据解析
                        for (Entry bufferEntry : transactionDataBuffer) {
                            List<EventData> parseDatas = internParse(pipeline, bufferEntry);
                            if (CollectionUtils.isEmpty(parseDatas)) {
                                // 可能为空,针对ddl返回时就为null
                                continue;
                            }
                            // 初步计算一下事件大小
                            long totalSize = bufferEntry.getHeader().getEventLength();
                            long eachSize = totalSize / parseDatas.size();
                            for (EventData eventData : parseDatas) {
                                if (eventData == null) {
                                    continue;
                                }
                                // 记录一下大小
                                eventData.setSize(eachSize);
                                if (needLoopback) {
                                    // 如果延迟超过指定的阀值,则设置为需要反查db
                                    if (now - eventData.getExecuteTime() > 1000 * pipeline.getParameters().getRemedyDelayThresoldForMedia()) {
                                        eventData.setSyncConsistency(SyncConsistency.MEDIA);
                                    } else {
                                        eventData.setSyncConsistency(SyncConsistency.BASE);
                                    }
                                    eventData.setRemedy(true);
                                }
                                eventDatas.add(eventData);
                            }
                        }
                    }
                    isLoopback = false;
                    needLoopback = false;
                    transactionDataBuffer.clear();
                    break;
                default:
                    break;
            }
        }
        // 添加最后一次的数据,可能没有TRANSACTIONEND
        if (!isLoopback || (enableLoopbackRemedy && needLoopback)) {
            // 添加数据解析
            for (Entry bufferEntry : transactionDataBuffer) {
                List<EventData> parseDatas = internParse(pipeline, bufferEntry);
                if (CollectionUtils.isEmpty(parseDatas)) {
                    // 可能为空,针对ddl返回时就为null
                    continue;
                }
                // 初步计算一下事件大小
                long totalSize = bufferEntry.getHeader().getEventLength();
                long eachSize = totalSize / parseDatas.size();
                for (EventData eventData : parseDatas) {
                    if (eventData == null) {
                        continue;
                    }
                    // 记录一下大小
                    eventData.setSize(eachSize);
                    if (needLoopback) {
                        // 如果延迟超过指定的阀值,则设置为需要反查db
                        if (now - eventData.getExecuteTime() > 1000 * pipeline.getParameters().getRemedyDelayThresoldForMedia()) {
                            eventData.setSyncConsistency(SyncConsistency.MEDIA);
                        } else {
                            eventData.setSyncConsistency(SyncConsistency.BASE);
                        }
                    }
                    eventDatas.add(eventData);
                }
            }
        }
    } catch (Exception e) {
        throw new SelectException(e);
    }
    return eventDatas;
}
Also used : RowChange(com.alibaba.otter.canal.protocol.CanalEntry.RowChange) ArrayList(java.util.ArrayList) SelectException(com.alibaba.otter.node.etl.select.exceptions.SelectException) EventData(com.alibaba.otter.shared.etl.model.EventData) Date(java.util.Date) SelectException(com.alibaba.otter.node.etl.select.exceptions.SelectException) TransformException(com.alibaba.otter.node.etl.transform.exception.TransformException) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Entry(com.alibaba.otter.canal.protocol.CanalEntry.Entry) CanalEntry(com.alibaba.otter.canal.protocol.CanalEntry) PipelineParameter(com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter)

Example 4 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DatabaseExtractorTest method test_global_row.

@Test
public void test_global_row() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    pipeline.getParameters().setSyncMode(SyncMode.ROW);
    // 设置为全局
    pipeline.getParameters().setSyncConsistency(SyncConsistency.MEDIA);
    int start = RandomUtils.nextInt();
    int count = 10;
    List<DataMediaPair> pairs = getDataMediaPairForMysql(start, count);
    pipeline.setPairs(pairs);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(100L);
            returns(pipeline);
        }
    };
    // 构造数据
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    for (int tableId = start; tableId < start + count; tableId++) {
        for (int i = start; i < start + count; i++) {
            EventData eventData = getEventData(tableId, i);
            eventData.setSchemaName("srf");
            eventData.setTableName("columns");
            rowBatch.merge(eventData);
        }
    }
    databaseExtractor.extract(new DbBatch(rowBatch));
    want.number(rowBatch.getDatas().size()).isEqualTo(count);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Example 5 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DatabaseExtractorTest method getEventData.

private EventData getEventData(long tableId, int value) {
    EventData eventData = new EventData();
    eventData.setTableId(tableId);
    eventData.setEventType(EventType.INSERT);
    eventData.setExecuteTime(new Date().getTime());
    eventData.setKeys(getPrimary(value));
    eventData.setColumns(getColumn(value));
    return eventData;
}
Also used : EventData(com.alibaba.otter.shared.etl.model.EventData) Date(java.util.Date)

Aggregations

EventData (com.alibaba.otter.shared.etl.model.EventData)48 ArrayList (java.util.ArrayList)20 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)19 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)18 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)16 Test (org.testng.annotations.Test)16 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)15 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)14 Identity (com.alibaba.otter.shared.etl.model.Identity)9 MapMaker (com.google.common.collect.MapMaker)9 RowKey (com.alibaba.otter.node.etl.load.loader.db.DbLoadMerger.RowKey)8 DataMedia (com.alibaba.otter.shared.common.model.config.data.DataMedia)8 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)7 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)5 ExtractException (com.alibaba.otter.node.etl.extract.exceptions.ExtractException)5 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)4 FileData (com.alibaba.otter.shared.etl.model.FileData)4 SelectException (com.alibaba.otter.node.etl.select.exceptions.SelectException)3 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)3 EventType (com.alibaba.otter.shared.etl.model.EventType)3