Search in sources :

Example 21 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DbLoadAction method load.

/**
     * 返回结果为已处理成功的记录
     */
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
    Assert.notNull(rowBatch);
    Identity identity = rowBatch.getIdentity();
    DbLoadContext context = buildContext(identity);
    try {
        List<EventData> datas = rowBatch.getDatas();
        context.setPrepareDatas(datas);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        if (datas == null || datas.size() == 0) {
            logger.info("##no eventdata for load, return");
            return context;
        }
        // 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
        // 设置media source时,只需要取第一节点的source即可
        context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
        interceptor.prepare(context);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        // 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
        if (isDdlDatas(datas)) {
            doDdl(context, datas);
        } else {
            WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
            List<Long> weights = buckets.weights();
            // weights可能为空,也得调用start方法
            controller.start(weights);
            if (CollectionUtils.isEmpty(datas)) {
                logger.info("##no eventdata for load");
            }
            // 根据manager配置调整线程池
            adjustPoolSize(context);
            // 调整一下运行参数
            adjustConfig(context);
            // 处理数据
            for (int i = 0; i < weights.size(); i++) {
                Long weight = weights.get(i);
                controller.await(weight.intValue());
                // 处理同一个weight下的数据
                List<EventData> items = buckets.getItems(weight);
                logger.debug("##start load for weight:" + weight);
                // 预处理下数据
                // 进行一次数据合并,合并相同pk的多次I/U/D操作
                items = DbLoadMerger.merge(items);
                // 按I/U/D进行归并处理
                DbLoadData loadData = new DbLoadData();
                doBefore(items, context, loadData);
                // 执行load操作
                doLoad(context, loadData);
                controller.single(weight.intValue());
                logger.debug("##end load for weight:" + weight);
            }
        }
        interceptor.commit(context);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        interceptor.error(context);
    } catch (Exception e) {
        interceptor.error(context);
        throw new LoadException(e);
    }
    // 返回处理成功的记录
    return context;
}
Also used : DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Identity(com.alibaba.otter.shared.etl.model.Identity) EventData(com.alibaba.otter.shared.etl.model.EventData) DataIntegrityViolationException(org.springframework.dao.DataIntegrityViolationException) DataAccessException(org.springframework.dao.DataAccessException) SQLException(java.sql.SQLException) DeadlockLoserDataAccessException(org.springframework.dao.DeadlockLoserDataAccessException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 22 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DbLoadAction method doDdl.

/**
     * 执行ddl的调用,处理逻辑比较简单: 串行调用
     * 
     * @param context
     * @param eventDatas
     */
private void doDdl(DbLoadContext context, List<EventData> eventDatas) {
    for (final EventData data : eventDatas) {
        DataMedia dataMedia = ConfigHelper.findDataMedia(context.getPipeline(), data.getTableId());
        final DbDialect dbDialect = dbDialectFactory.getDbDialect(context.getIdentity().getPipelineId(), (DbMediaSource) dataMedia.getSource());
        Boolean skipDdlException = context.getPipeline().getParameters().getSkipDdlException();
        try {
            Boolean result = dbDialect.getJdbcTemplate().execute(new StatementCallback<Boolean>() {

                public Boolean doInStatement(Statement stmt) throws SQLException, DataAccessException {
                    Boolean result = false;
                    if (dbDialect instanceof MysqlDialect && StringUtils.isNotEmpty(data.getDdlSchemaName())) {
                        // 如果mysql,执行ddl时,切换到在源库执行的schema上
                        // result &= stmt.execute("use " + data.getDdlSchemaName());
                        // 解决当数据库名称为关键字如"Order"的时候,会报错,无法同步
                        result &= stmt.execute("use `" + data.getDdlSchemaName() + "`");
                    }
                    result &= stmt.execute(data.getSql());
                    return result;
                }
            });
            if (result) {
                // 记录为成功处理的sql
                context.getProcessedDatas().add(data);
            } else {
                context.getFailedDatas().add(data);
            }
        } catch (Throwable e) {
            if (skipDdlException) {
                // do skip
                logger.warn("skip exception for ddl : {} , caused by {}", data, ExceptionUtils.getFullStackTrace(e));
            } else {
                throw new LoadException(e);
            }
        }
    }
}
Also used : MysqlDialect(com.alibaba.otter.node.etl.common.db.dialect.mysql.MysqlDialect) SQLException(java.sql.SQLException) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) EventData(com.alibaba.otter.shared.etl.model.EventData) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia) DataAccessException(org.springframework.dao.DataAccessException) DeadlockLoserDataAccessException(org.springframework.dao.DeadlockLoserDataAccessException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 23 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class DatabaseExtractor method extract.

@Override
public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
    // 如果是行记录是必须进行数据库反查
    boolean isRow = pipeline.getParameters().getSyncMode().isRow();
    // 读取一次配置
    // 调整下线程池,Extractor会被池化处理
    adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
    ExecutorCompletionService completionService = new ExecutorCompletionService(executor);
    // 进行并发提交
    ExtractException exception = null;
    // 每个表进行处理
    List<DataItem> items = new ArrayList<DataItem>();
    List<Future> futures = new ArrayList<Future>();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        DataItem item = new DataItem(eventData);
        // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
        boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia());
        // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
        if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
            DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
            if (dataMedia.getSource().getType().isOracle()) {
                flag |= true;
                // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
                eventData.setRemedy(true);
            }
        }
        if (isRow && !flag) {
            // 提前判断一次,避免进入多线程进行竞争
            // 针对view视图的情况,会有后续再判断一次
            flag = checkNeedDbForRowMode(pipeline, eventData);
        }
        if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {
            // 判断是否需要反查
            // 提交进行并行查询
            Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null);
            if (future.isDone()) {
                // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
                try {
                    future.get();
                } catch (InterruptedException e) {
                    // 取消完之后立马退出
                    cancel(futures);
                    throw new ExtractException(e);
                } catch (ExecutionException e) {
                    // 取消完之后立马退出
                    cancel(futures);
                    throw new ExtractException(e);
                }
            }
            // 记录一下添加的任务
            futures.add(future);
        }
        // 按顺序添加
        items.add(item);
    }
    // 开始处理结果
    int index = 0;
    while (index < futures.size()) {
        // 循环处理发出去的所有任务
        try {
            // 它也可能被打断
            Future future = completionService.take();
            future.get();
        } catch (InterruptedException e) {
            exception = new ExtractException(e);
            // 如何一个future出现了异常,就退出
            break;
        } catch (ExecutionException e) {
            exception = new ExtractException(e);
            // 如何一个future出现了异常,就退出
            break;
        }
        index++;
    }
    if (index < futures.size()) {
        // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
        cancel(futures);
        throw exception;
    } else {
        // 全部成功分支, 构造返回结果也要保证原始的顺序
        for (int i = 0; i < items.size(); i++) {
            DataItem item = items.get(i);
            if (item.filter) {
                // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
                eventDatas.remove(item.getEventData());
            }
        }
    }
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia)

Example 24 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class FileExtractor method doFileExtract.

/**
     * 返回这批变更数据对应的FileInfo.
     * 
     * @param rowBatch
     * @return
     */
private List<FileData> doFileExtract(RowBatch rowBatch) {
    List<FileData> fileDatas = new ArrayList<FileData>();
    // 处理数据
    Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
    List<EventData> eventDatas = rowBatch.getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
        if (dataMediaPairs == null) {
            throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
        }
        for (DataMediaPair dataMediaPair : dataMediaPairs) {
            if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
                continue;
            }
            FileResolver fileResolver = null;
            if (dataMediaPair.getResolverData() != null) {
                fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
            } else {
                continue;
            }
            if (fileResolver == null) {
                throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className  = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
            }
            if (fileResolver instanceof RemoteDirectoryFetcherAware) {
                RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
                remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
            }
            List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
            // 做一下去重处理
            for (FileData data : singleRowFileDatas) {
                if (!fileDatas.contains(data)) {
                    fileDatas.add(data);
                }
            }
        }
    }
    // 判断是否需要进行图片重复同步检查
    if (pipeline.getParameters().getFileDetect()) {
        doFileDetectCollector(pipeline, fileDatas);
    }
    return fileDatas;
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) RemoteDirectoryFetcherAware(com.alibaba.otter.shared.etl.extend.fileresolver.support.RemoteDirectoryFetcherAware) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) ArrayList(java.util.ArrayList) FileResolver(com.alibaba.otter.shared.etl.extend.fileresolver.FileResolver) FileData(com.alibaba.otter.shared.etl.model.FileData) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 25 with EventData

use of com.alibaba.otter.shared.etl.model.EventData in project otter by alibaba.

the class GroupExtractor method extract.

@Override
public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    List<DataMediaPair> dataMediaPairs = pipeline.getPairs();
    /**
         * Key = TableId<br>
         * Value = a List of this tableId's column need to sync<br>
         */
    Map<Long, List<ColumnGroup>> groupColumns = new HashMap<Long, List<ColumnGroup>>();
    for (DataMediaPair dataMediaPair : dataMediaPairs) {
        List<ColumnGroup> columnGroups = dataMediaPair.getColumnGroups();
        if (!CollectionUtils.isEmpty(columnGroups)) {
            groupColumns.put(dataMediaPair.getSource().getId(), columnGroups);
        }
    }
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        List<ColumnGroup> columnGroups = groupColumns.get(eventData.getTableId());
        if (!CollectionUtils.isEmpty(columnGroups)) {
            for (ColumnGroup columnGroup : columnGroups) {
                if (columnGroup != null && !CollectionUtils.isEmpty(columnGroup.getColumnPairs())) {
                    groupFilter(eventData, columnGroup);
                }
            }
        }
    }
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) ColumnGroup(com.alibaba.otter.shared.common.model.config.data.ColumnGroup) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Aggregations

EventData (com.alibaba.otter.shared.etl.model.EventData)48 ArrayList (java.util.ArrayList)20 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)19 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)18 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)16 Test (org.testng.annotations.Test)16 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)15 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)14 Identity (com.alibaba.otter.shared.etl.model.Identity)9 MapMaker (com.google.common.collect.MapMaker)9 RowKey (com.alibaba.otter.node.etl.load.loader.db.DbLoadMerger.RowKey)8 DataMedia (com.alibaba.otter.shared.common.model.config.data.DataMedia)8 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)7 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)5 ExtractException (com.alibaba.otter.node.etl.extract.exceptions.ExtractException)5 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)4 FileData (com.alibaba.otter.shared.etl.model.FileData)4 SelectException (com.alibaba.otter.node.etl.select.exceptions.SelectException)3 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)3 EventType (com.alibaba.otter.shared.etl.model.EventType)3