Search in sources :

Example 21 with EventColumn

use of com.alibaba.otter.shared.etl.model.EventColumn in project otter by alibaba.

the class FileExtractor method getSingleRowFileInfos.

private List<FileData> getSingleRowFileInfos(long pairId, FileResolver fileResolver, EventData eventData) {
    if (eventData.getEventType() == EventType.DELETE && fileResolver.isDeleteRequired() == false) {
        return new ArrayList<FileData>();
    }
    Map<String, String> rowMap = new HashMap<String, String>();
    List<EventColumn> keyColumns = eventData.getKeys();
    List<EventColumn> eventColumns = eventData.getUpdatedColumns();
    for (EventColumn eventColumn : keyColumns) {
        rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
    }
    for (EventColumn eventColumn : eventColumns) {
        rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
    }
    FileInfo[] fileInfos = fileResolver.getFileInfo(rowMap);
    if (fileInfos == null || fileInfos.length == 0) {
        return new ArrayList<FileData>();
    } else {
        List<FileData> fileDatas = new ArrayList<FileData>();
        for (FileInfo fileInfo : fileInfos) {
            FileData fileData = new FileData();
            // 记录一下具体映射规则的id
            fileData.setPairId(pairId);
            fileData.setTableId(eventData.getTableId());
            fileData.setEventType(eventData.getEventType());
            fileData.setLastModifiedTime(fileInfo.getLastModifiedTime());
            fileData.setNameSpace(fileInfo.getNamespace());
            fileData.setPath(fileInfo.getPath());
            fileData.setSize(fileInfo.getSize());
            fileDatas.add(fileData);
        }
        return fileDatas;
    }
}
Also used : FileInfo(com.alibaba.otter.shared.etl.extend.fileresolver.FileInfo) HashMap(java.util.HashMap) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 22 with EventColumn

use of com.alibaba.otter.shared.etl.model.EventColumn in project otter by alibaba.

the class GroupExtractor method groupFilter.

private void groupFilter(EventData eventData, ColumnGroup columnGroup) {
    List<EventColumn> addColumns = new ArrayList<EventColumn>();
    // 判断一下是否存在字段组内字段的变更
    Set<String> updatedColumns = new HashSet<String>();
    Set<String> pks = new HashSet<String>();
    // 注意,这里只拿实际需要同步变更的字段
    for (EventColumn column : eventData.getUpdatedColumns()) {
        updatedColumns.add(column.getColumnName());
    }
    for (EventColumn pk : eventData.getKeys()) {
        pks.add(pk.getColumnName());
    }
    if (!CollectionUtils.isEmpty(eventData.getOldKeys())) {
        // 处理变更的主键
        int i = 0;
        for (EventColumn pk : eventData.getKeys()) {
            if (!StringUtils.equals(pk.getColumnValue(), eventData.getOldKeys().get(i).getColumnValue())) {
                updatedColumns.add(pk.getColumnName());
            }
            i++;
        }
    }
    if (containsInGroupColumn(updatedColumns, columnGroup.getColumnPairs())) {
        // 将变更的字段+变更的主键 去和 group字段进行交集处理
        for (ColumnPair columnPair : columnGroup.getColumnPairs()) {
            // 原谅我起这么长的变量名…
            boolean groupColumnHasInChangedColunms = false;
            // for (String columnName : updatedColumns) {
            for (EventColumn column : eventData.getColumns()) {
                if (StringUtils.equalsIgnoreCase(columnPair.getSourceColumn().getName(), column.getColumnName())) {
                    groupColumnHasInChangedColunms = true;
                    if (!column.isUpdate()) {
                        // 如果为非同步字段,强制修改为update=true进行数据同步
                        column.setUpdate(true);
                    }
                    break;
                }
            }
            if (!groupColumnHasInChangedColunms) {
                // 不存在对应的变更字段记录
                String columnName = columnPair.getSourceColumn().getName();
                if (!pks.contains(columnName)) {
                    // 只添加非主键的值到反查column,因为主键不需要反查
                    EventColumn addColumn = new EventColumn();
                    addColumn.setColumnName(columnPair.getSourceColumn().getName());
                    addColumn.setUpdate(true);
                    addColumns.add(addColumn);
                }
            }
        }
        if (!CollectionUtils.isEmpty(addColumns)) {
            // 字段去重
            // 添加不足的字段
            eventData.getColumns().addAll(addColumns);
            eventData.setSyncConsistency(SyncConsistency.MEDIA);
            return;
        }
    }
}
Also used : ColumnPair(com.alibaba.otter.shared.common.model.config.data.ColumnPair) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Example 23 with EventColumn

use of com.alibaba.otter.shared.etl.model.EventColumn in project otter by alibaba.

the class DbLoadMerger method replaceColumnValue.

/**
     * 把old中的值存在而new中不存在的值合并到new中,并且把old中的变更前的主键保存到new中的变更前的主键.
     * 
     * @param newEventData
     * @param oldEventData
     * @return
     */
private static EventData replaceColumnValue(EventData newEventData, EventData oldEventData) {
    List<EventColumn> newColumns = newEventData.getColumns();
    List<EventColumn> oldColumns = oldEventData.getColumns();
    List<EventColumn> temp = new ArrayList<EventColumn>();
    for (EventColumn oldColumn : oldColumns) {
        boolean contain = false;
        for (EventColumn newColumn : newColumns) {
            if (oldColumn.getColumnName().equalsIgnoreCase(newColumn.getColumnName())) {
                // 合并isUpdate字段
                newColumn.setUpdate(newColumn.isUpdate() || oldColumn.isUpdate());
                contain = true;
            }
        }
        if (!contain) {
            temp.add(oldColumn);
        }
    }
    newColumns.addAll(temp);
    // 排序
    Collections.sort(newColumns, new EventColumnIndexComparable());
    // 把上一次变更的旧主键传递到这次变更的旧主键.
    newEventData.setOldKeys(oldEventData.getOldKeys());
    if (oldEventData.getSyncConsistency() != null) {
        newEventData.setSyncConsistency(oldEventData.getSyncConsistency());
    }
    if (oldEventData.getSyncMode() != null) {
        newEventData.setSyncMode(oldEventData.getSyncMode());
    }
    if (oldEventData.isRemedy()) {
        newEventData.setRemedy(oldEventData.isRemedy());
    }
    newEventData.setSize(oldEventData.getSize() + newEventData.getSize());
    return newEventData;
}
Also used : EventColumnIndexComparable(com.alibaba.otter.shared.etl.model.EventColumnIndexComparable) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList)

Example 24 with EventColumn

use of com.alibaba.otter.shared.etl.model.EventColumn in project otter by alibaba.

the class SqlBuilderLoadInterceptor method buildColumnNames.

private String[] buildColumnNames(List<EventColumn> columns) {
    String[] result = new String[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        EventColumn column = columns.get(i);
        result[i] = column.getColumnName();
    }
    return result;
}
Also used : EventColumn(com.alibaba.otter.shared.etl.model.EventColumn)

Example 25 with EventColumn

use of com.alibaba.otter.shared.etl.model.EventColumn in project otter by alibaba.

the class FreedomExtractor method extract.

public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean skipFreedom = pipeline.getParameters().getSkipFreedom();
    String bufferSchema = pipeline.getParameters().getSystemSchema();
    String bufferTable = pipeline.getParameters().getSystemBufferTable();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    // 使用set,提升remove时的查找速度
    Set<EventData> removeDatas = new HashSet<EventData>();
    for (EventData eventData : eventDatas) {
        if (StringUtils.equalsIgnoreCase(bufferSchema, eventData.getSchemaName()) && StringUtils.equalsIgnoreCase(bufferTable, eventData.getTableName())) {
            if (eventData.getEventType().isDdl()) {
                continue;
            }
            if (skipFreedom) {
                // 判断是否需要忽略
                removeDatas.add(eventData);
                continue;
            }
            // 只处理insert / update记录
            if (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate()) {
                // 重新改写一下EventData的数据,根据系统表的定义
                EventColumn tableIdColumn = getMatchColumn(eventData.getColumns(), TABLE_ID);
                // 获取到对应tableId的media信息
                try {
                    DataMedia dataMedia = null;
                    Long tableId = Long.valueOf(tableIdColumn.getColumnValue());
                    eventData.setTableId(tableId);
                    if (tableId <= 0) {
                        // 直接按照full_name进行查找
                        // 尝试直接根据schema+table name进行查找
                        EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                        if (fullNameColumn != null) {
                            String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                            if (names.length >= 2) {
                                dataMedia = ConfigHelper.findSourceDataMedia(pipeline, names[0], names[1]);
                                eventData.setTableId(dataMedia.getId());
                            } else {
                                throw new ConfigException("no such DataMedia " + names);
                            }
                        }
                    } else {
                        // 如果指定了tableId,需要按照tableId进行严格查找,如果没找到,那说明不需要进行同步
                        dataMedia = ConfigHelper.findDataMedia(pipeline, Long.valueOf(tableIdColumn.getColumnValue()));
                    }
                    DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource());
                    // 考虑offer[1-128]的配置模式
                    if (!dataMedia.getNameMode().getMode().isSingle() || !dataMedia.getNamespaceMode().getMode().isSingle()) {
                        boolean hasError = true;
                        EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                        if (fullNameColumn != null) {
                            String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                            if (names.length >= 2) {
                                eventData.setSchemaName(names[0]);
                                eventData.setTableName(names[1]);
                                hasError = false;
                            }
                        }
                        if (hasError) {
                            // 出现异常,需要记录一下
                            logger.warn("dataMedia mode:{} , fullname:{} ", dataMedia.getMode(), fullNameColumn == null ? null : fullNameColumn.getColumnValue());
                            removeDatas.add(eventData);
                            // 跳过这条记录
                            continue;
                        }
                    } else {
                        eventData.setSchemaName(dataMedia.getNamespace());
                        eventData.setTableName(dataMedia.getName());
                    }
                    // 更新业务类型
                    EventColumn typeColumn = getMatchColumn(eventData.getColumns(), TYPE);
                    EventType eventType = EventType.valuesOf(typeColumn.getColumnValue());
                    eventData.setEventType(eventType);
                    if (eventType.isUpdate()) {
                        // 如果是update强制修改为insert,这样可以在目标端执行merge
                        // sql
                        eventData.setEventType(EventType.INSERT);
                    } else if (eventType.isDdl()) {
                        dbDialect.reloadTable(eventData.getSchemaName(), eventData.getTableName());
                        // 删除当前记录
                        removeDatas.add(eventData);
                        continue;
                    }
                    // 重新构建新的业务主键字段
                    EventColumn pkDataColumn = getMatchColumn(eventData.getColumns(), PK_DATA);
                    String pkData = pkDataColumn.getColumnValue();
                    String[] pks = StringUtils.split(pkData, PK_SPLIT);
                    Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
                    List<EventColumn> newColumns = new ArrayList<EventColumn>();
                    Column[] primaryKeyColumns = table.getPrimaryKeyColumns();
                    if (primaryKeyColumns.length > pks.length) {
                        throw new ExtractException("data pk column size not match , data:" + eventData.toString());
                    }
                    // 构建字段
                    Column[] allColumns = table.getColumns();
                    int pkIndex = 0;
                    for (int i = 0; i < allColumns.length; i++) {
                        Column column = allColumns[i];
                        if (column.isPrimaryKey()) {
                            EventColumn newColumn = new EventColumn();
                            // 设置下标
                            newColumn.setIndex(i);
                            newColumn.setColumnName(column.getName());
                            newColumn.setColumnType(column.getTypeCode());
                            newColumn.setColumnValue(pks[pkIndex]);
                            newColumn.setKey(true);
                            newColumn.setNull(pks[pkIndex] == null);
                            newColumn.setUpdate(true);
                            // 添加到记录
                            newColumns.add(newColumn);
                            pkIndex++;
                        }
                    }
                    // 设置数据
                    eventData.setKeys(newColumns);
                    eventData.setOldKeys(new ArrayList<EventColumn>());
                    eventData.setColumns(new ArrayList<EventColumn>());
                    // 设置为行记录+反查
                    eventData.setSyncMode(SyncMode.ROW);
                    eventData.setSyncConsistency(SyncConsistency.MEDIA);
                    eventData.setRemedy(true);
                    // 默认为1kb,如果还是按照binlog大小计算的话,可能会采用rpc传输,导致内存不够用
                    eventData.setSize(1024);
                } catch (ConfigException e) {
                    // 忽略掉,因为系统表会被共享,所以这条记录会被不是该同步通道给获取到
                    logger.info("find DataMedia error " + eventData.toString(), e);
                    removeDatas.add(eventData);
                    continue;
                } catch (Throwable e) {
                    // 出现异常时忽略掉
                    logger.warn("process freedom data error " + eventData.toString(), e);
                    removeDatas.add(eventData);
                    continue;
                }
            } else {
                // 删除该记录
                removeDatas.add(eventData);
            }
        }
    }
    if (!CollectionUtils.isEmpty(removeDatas)) {
        eventDatas.removeAll(removeDatas);
    }
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) Table(org.apache.ddlutils.model.Table) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) EventType(com.alibaba.otter.shared.etl.model.EventType) ArrayList(java.util.ArrayList) ConfigException(com.alibaba.otter.shared.common.model.config.ConfigException) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) Column(org.apache.ddlutils.model.Column) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia) HashSet(java.util.HashSet)

Aggregations

EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)38 EventData (com.alibaba.otter.shared.etl.model.EventData)18 ArrayList (java.util.ArrayList)13 Test (org.testng.annotations.Test)9 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)8 RowKey (com.alibaba.otter.node.etl.load.loader.db.DbLoadMerger.RowKey)8 MapMaker (com.google.common.collect.MapMaker)8 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)4 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)4 FileData (com.alibaba.otter.shared.etl.model.FileData)4 Table (org.apache.ddlutils.model.Table)4 ColumnPair (com.alibaba.otter.shared.common.model.config.data.ColumnPair)3 DataMedia (com.alibaba.otter.shared.common.model.config.data.DataMedia)3 EventType (com.alibaba.otter.shared.etl.model.EventType)3 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)3 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)3 File (java.io.File)3 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)2 BatchProto (com.alibaba.otter.node.etl.model.protobuf.BatchProto)2 TransformException (com.alibaba.otter.node.etl.transform.exception.TransformException)2