Search in sources :

Example 26 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DbLoadMerger method mergeDelete.

private static void mergeDelete(EventData eventData, Map<RowKey, EventData> result) {
    // 只保留pks,把columns去掉. 以后针对数据仓库可以开放delete columns记录
    RowKey rowKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys());
    if (!result.containsKey(rowKey)) {
        result.put(rowKey, eventData);
    } else {
        EventData oldEventData = result.get(rowKey);
        eventData.setSize(oldEventData.getSize() + eventData.getSize());
        if (!CollectionUtils.isEmpty(oldEventData.getOldKeys())) {
            // 存在主键变更
            // insert/update -> delete记录组合时,delete的对应的pk为上一条记录的pk
            eventData.setKeys(oldEventData.getOldKeys());
            // 清除oldKeys
            eventData.getOldKeys().clear();
            // 删除老的对象
            result.remove(rowKey);
            result.put(new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys()), // key发生变化,需要重新构造一个RowKey
            eventData);
        } else {
            // 清除oldKeys
            eventData.getOldKeys().clear();
            result.put(rowKey, eventData);
        }
    }
}
Also used : EventData(com.alibaba.otter.shared.etl.model.EventData)

Example 27 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DbLoadMerger method mergeInsert.

private static void mergeInsert(EventData eventData, Map<RowKey, EventData> result) {
    // insert无主键变更的处理
    RowKey rowKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys());
    if (!result.containsKey(rowKey)) {
        result.put(rowKey, eventData);
    } else {
        EventData oldEventData = result.get(rowKey);
        eventData.setSize(oldEventData.getSize() + eventData.getSize());
        // 如果上一条变更是delete的,就直接用insert替换
        if (oldEventData.getEventType() == EventType.DELETE) {
            result.put(rowKey, eventData);
        } else if (oldEventData.getEventType() == EventType.UPDATE || oldEventData.getEventType() == EventType.INSERT) {
            // insert之前出现了update逻辑上不可能,唯一的可能性主要是Freedom的介入,人为的插入了一条Insert记录
            // 不过freedom一般不建议Insert操作,只建议执行update/delete操作. update默认会走merge
            // sql,不存在即插入
            logger.warn("update-insert/insert-insert happend. before[{}] , after[{}]", oldEventData, eventData);
            // 如果上一条变更是update的,就用insert替换,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
            EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
            // 清空oldkeys,insert记录不需要
            mergeEventData.getOldKeys().clear();
            result.put(rowKey, mergeEventData);
        }
    }
}
Also used : EventData(com.alibaba.otter.shared.etl.model.EventData)

Example 28 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DbLoadMerger method mergeUpdate.

private static void mergeUpdate(EventData eventData, Map<RowKey, EventData> result) {
    RowKey rowKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys());
    if (!CollectionUtils.isEmpty(eventData.getOldKeys())) {
        // 存在主键变更
        // 需要解决(1->2 , 2->3)级联主键变更的问题
        RowKey oldKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getOldKeys());
        if (!result.containsKey(oldKey)) {
            // 不需要级联
            result.put(rowKey, eventData);
        } else {
            EventData oldEventData = result.get(oldKey);
            eventData.setSize(oldEventData.getSize() + eventData.getSize());
            // 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
            if (oldEventData.getEventType() == EventType.INSERT) {
                eventData.setEventType(EventType.INSERT);
                // 删除当前变更数据老主键的记录.
                result.remove(oldKey);
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                // 清空oldkeys,insert记录不需要
                mergeEventData.getOldKeys().clear();
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.UPDATE) {
                // 删除当前变更数据老主键的记录.
                result.remove(oldKey);
                // 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else {
                throw new LoadException("delete(has old pks) + update impossible happed!");
            }
        }
    } else {
        if (!result.containsKey(rowKey)) {
            // 没有主键变更
            result.put(rowKey, eventData);
        } else {
            EventData oldEventData = result.get(rowKey);
            // 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
            if (oldEventData.getEventType() == EventType.INSERT) {
                eventData.setEventType(EventType.INSERT);
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.UPDATE) {
                // 可能存在
                // 1->2
                // ,
                // 2update的问题
                // 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.DELETE) {
                //异常情况,出现 delete + update,那就直接更新为update
                result.put(rowKey, eventData);
            }
        }
    }
}
Also used : EventData(com.alibaba.otter.shared.etl.model.EventData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 29 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class FreedomExtractor method extract.

public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean skipFreedom = pipeline.getParameters().getSkipFreedom();
    String bufferSchema = pipeline.getParameters().getSystemSchema();
    String bufferTable = pipeline.getParameters().getSystemBufferTable();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    // 使用set,提升remove时的查找速度
    Set<EventData> removeDatas = new HashSet<EventData>();
    for (EventData eventData : eventDatas) {
        if (StringUtils.equalsIgnoreCase(bufferSchema, eventData.getSchemaName()) && StringUtils.equalsIgnoreCase(bufferTable, eventData.getTableName())) {
            if (eventData.getEventType().isDdl()) {
                continue;
            }
            if (skipFreedom) {
                // 判断是否需要忽略
                removeDatas.add(eventData);
                continue;
            }
            // 只处理insert / update记录
            if (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate()) {
                // 重新改写一下EventData的数据,根据系统表的定义
                EventColumn tableIdColumn = getMatchColumn(eventData.getColumns(), TABLE_ID);
                // 获取到对应tableId的media信息
                try {
                    DataMedia dataMedia = null;
                    Long tableId = Long.valueOf(tableIdColumn.getColumnValue());
                    eventData.setTableId(tableId);
                    if (tableId <= 0) {
                        // 直接按照full_name进行查找
                        // 尝试直接根据schema+table name进行查找
                        EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                        if (fullNameColumn != null) {
                            String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                            if (names.length >= 2) {
                                dataMedia = ConfigHelper.findSourceDataMedia(pipeline, names[0], names[1]);
                                eventData.setTableId(dataMedia.getId());
                            } else {
                                throw new ConfigException("no such DataMedia " + names);
                            }
                        }
                    } else {
                        // 如果指定了tableId,需要按照tableId进行严格查找,如果没找到,那说明不需要进行同步
                        dataMedia = ConfigHelper.findDataMedia(pipeline, Long.valueOf(tableIdColumn.getColumnValue()));
                    }
                    DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
                    // 考虑offer[1-128]的配置模式
                    if (!dataMedia.getNameMode().getMode().isSingle() || !dataMedia.getNamespaceMode().getMode().isSingle()) {
                        boolean hasError = true;
                        EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                        if (fullNameColumn != null) {
                            String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                            if (names.length >= 2) {
                                eventData.setSchemaName(names[0]);
                                eventData.setTableName(names[1]);
                                hasError = false;
                            }
                        }
                        if (hasError) {
                            // 出现异常,需要记录一下
                            logger.warn("dataMedia mode:{} , fullname:{} ", dataMedia.getMode(), fullNameColumn == null ? null : fullNameColumn.getColumnValue());
                            removeDatas.add(eventData);
                            // 跳过这条记录
                            continue;
                        }
                    } else {
                        eventData.setSchemaName(dataMedia.getNamespace());
                        eventData.setTableName(dataMedia.getName());
                    }
                    // 更新业务类型
                    EventColumn typeColumn = getMatchColumn(eventData.getColumns(), TYPE);
                    EventType eventType = EventType.valuesOf(typeColumn.getColumnValue());
                    eventData.setEventType(eventType);
                    if (eventType.isUpdate()) {
                        // 如果是update强制修改为insert,这样可以在目标端执行merge
                        // sql
                        eventData.setEventType(EventType.INSERT);
                    } else if (eventType.isDdl()) {
                        dbDialect.reloadTable(eventData.getSchemaName(), eventData.getTableName());
                        // 删除当前记录
                        removeDatas.add(eventData);
                        continue;
                    }
                    // 重新构建新的业务主键字段
                    EventColumn pkDataColumn = getMatchColumn(eventData.getColumns(), PK_DATA);
                    String pkData = pkDataColumn.getColumnValue();
                    String[] pks = StringUtils.split(pkData, PK_SPLIT);
                    Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
                    List<EventColumn> newColumns = new ArrayList<EventColumn>();
                    Column[] primaryKeyColumns = table.getPrimaryKeyColumns();
                    if (primaryKeyColumns.length > pks.length) {
                        throw new ExtractException("data pk column size not match , data:" + eventData.toString());
                    }
                    // 构建字段
                    Column[] allColumns = table.getColumns();
                    int pkIndex = 0;
                    for (int i = 0; i < allColumns.length; i++) {
                        Column column = allColumns[i];
                        if (column.isPrimaryKey()) {
                            EventColumn newColumn = new EventColumn();
                            // 设置下标
                            newColumn.setIndex(i);
                            newColumn.setColumnName(column.getName());
                            newColumn.setColumnType(column.getTypeCode());
                            newColumn.setColumnValue(pks[pkIndex]);
                            newColumn.setKey(true);
                            newColumn.setNull(pks[pkIndex] == null);
                            newColumn.setUpdate(true);
                            // 添加到记录
                            newColumns.add(newColumn);
                            pkIndex++;
                        }
                    }
                    // 设置数据
                    eventData.setKeys(newColumns);
                    eventData.setOldKeys(new ArrayList<EventColumn>());
                    eventData.setColumns(new ArrayList<EventColumn>());
                    // 设置为行记录+反查
                    eventData.setSyncMode(SyncMode.ROW);
                    eventData.setSyncConsistency(SyncConsistency.MEDIA);
                    eventData.setRemedy(true);
                    // 默认为1kb,如果还是按照binlog大小计算的话,可能会采用rpc传输,导致内存不够用
                    eventData.setSize(1024);
                } catch (ConfigException e) {
                    // 忽略掉,因为系统表会被共享,所以这条记录会被不是该同步通道给获取到
                    logger.info("find DataMedia error " + eventData.toString(), e);
                    removeDatas.add(eventData);
                    continue;
                } catch (Throwable e) {
                    // 出现异常时忽略掉
                    logger.warn("process freedom data error " + eventData.toString(), e);
                    removeDatas.add(eventData);
                    continue;
                }
            } else {
                // 删除该记录
                removeDatas.add(eventData);
            }
        }
    }
    if (!CollectionUtils.isEmpty(removeDatas)) {
        eventDatas.removeAll(removeDatas);
    }
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) Table(org.apache.ddlutils.model.Table) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) EventType(com.alibaba.otter.shared.etl.model.EventType) ArrayList(java.util.ArrayList) ConfigException(com.alibaba.otter.shared.common.model.config.ConfigException) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) Column(org.apache.ddlutils.model.Column) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia) HashSet(java.util.HashSet)

Example 30 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class ProcessorExtractor method extract.

public void extract(DbBatch param) throws ExtractException {
    ExecutorTemplate executorTemplate = null;
    try {
        RowBatch rowBatch = param.getRowBatch();
        final Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
        List<EventData> eventDatas = rowBatch.getDatas();
        // 使用set,提升remove时的查找速度
        final Set<EventData> removeDatas = Collections.synchronizedSet(new HashSet<EventData>());
        executorTemplate = executorTemplateGetter.get();
        executorTemplate.start();
        // 重新设置下poolSize
        executorTemplate.adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
        for (final EventData eventData : eventDatas) {
            List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
            if (dataMediaPairs == null) {
                throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
            }
            for (DataMediaPair dataMediaPair : dataMediaPairs) {
                if (!dataMediaPair.isExistFilter()) {
                    continue;
                }
                final EventProcessor eventProcessor = extensionFactory.getExtension(EventProcessor.class, dataMediaPair.getFilterData());
                if (eventProcessor instanceof DataSourceFetcherAware) {
                    ((DataSourceFetcherAware) eventProcessor).setDataSourceFetcher(new DataSourceFetcher() {

                        @Override
                        public DataSource fetch(Long tableId) {
                            DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, tableId);
                            return dataSourceService.getDataSource(pipeline.getId(), dataMedia.getSource());
                        }
                    });
                    executorTemplate.submit(new Runnable() {

                        @Override
                        public void run() {
                            MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeline.getId()));
                            boolean process = eventProcessor.process(eventData);
                            if (!process) {
                                // 添加到删除记录中
                                removeDatas.add(eventData);
                            }
                        }
                    });
                } else {
                    boolean process = eventProcessor.process(eventData);
                    if (!process) {
                        // 添加到删除记录中
                        removeDatas.add(eventData);
                        break;
                    }
                }
            }
        }
        // 等待所有都处理完成
        executorTemplate.waitForResult();
        if (!CollectionUtils.isEmpty(removeDatas)) {
            eventDatas.removeAll(removeDatas);
        }
    } finally {
        if (executorTemplate != null) {
            executorTemplateGetter.release(executorTemplate);
        }
    }
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) ExecutorTemplate(com.alibaba.otter.shared.common.utils.thread.ExecutorTemplate) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) DataSourceFetcher(com.alibaba.otter.shared.etl.extend.processor.support.DataSourceFetcher) DataSourceFetcherAware(com.alibaba.otter.shared.etl.extend.processor.support.DataSourceFetcherAware) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) DataSource(javax.sql.DataSource) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) EventProcessor(com.alibaba.otter.shared.etl.extend.processor.EventProcessor) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia)

Aggregations

EventData (com.alibaba.otter.shared.etl.model.EventData)48 ArrayList (java.util.ArrayList)20 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)19 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)18 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)16 Test (org.testng.annotations.Test)16 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)15 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)14 Identity (com.alibaba.otter.shared.etl.model.Identity)9 MapMaker (com.google.common.collect.MapMaker)9 RowKey (com.alibaba.otter.node.etl.load.loader.db.DbLoadMerger.RowKey)8 DataMedia (com.alibaba.otter.shared.common.model.config.data.DataMedia)8 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)7 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)5 ExtractException (com.alibaba.otter.node.etl.extract.exceptions.ExtractException)5 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)4 FileData (com.alibaba.otter.shared.etl.model.FileData)4 SelectException (com.alibaba.otter.node.etl.select.exceptions.SelectException)3 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)3 EventType (com.alibaba.otter.shared.etl.model.EventType)3