Search in sources :

Example 1 with LoadException

use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.

the class DbLoadAction method load.

/**
 * 返回结果为已处理成功的记录
 */
public DbLoadContext load(RowBatch rowBatch, WeightController controller) {
    Assert.notNull(rowBatch);
    Identity identity = rowBatch.getIdentity();
    DbLoadContext context = buildContext(identity);
    try {
        List<EventData> datas = rowBatch.getDatas();
        context.setPrepareDatas(datas);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        if (datas == null || datas.size() == 0) {
            logger.info("##no eventdata for load, return");
            return context;
        }
        // 因为所有的数据在DbBatchLoader已按照DateMediaSource进行归好类,不同数据源介质会有不同的DbLoadAction进行处理
        // 设置media source时,只需要取第一节点的source即可
        context.setDataMediaSource(ConfigHelper.findDataMedia(context.getPipeline(), datas.get(0).getTableId()).getSource());
        interceptor.prepare(context);
        // 执行重复录入数据过滤
        datas = context.getPrepareDatas();
        // 主要考虑ddl的幂等性问题,尽可能一个ddl一个batch,失败或者回滚都只针对这条sql
        if (isDdlDatas(datas)) {
            doDdl(context, datas);
        } else {
            WeightBuckets<EventData> buckets = buildWeightBuckets(context, datas);
            List<Long> weights = buckets.weights();
            // weights可能为空,也得调用start方法
            controller.start(weights);
            if (CollectionUtils.isEmpty(datas)) {
                logger.info("##no eventdata for load");
            }
            // 根据manager配置调整线程池
            adjustPoolSize(context);
            // 调整一下运行参数
            adjustConfig(context);
            // 处理数据
            for (int i = 0; i < weights.size(); i++) {
                Long weight = weights.get(i);
                controller.await(weight.intValue());
                // 处理同一个weight下的数据
                List<EventData> items = buckets.getItems(weight);
                logger.debug("##start load for weight:" + weight);
                // 预处理下数据
                // 进行一次数据合并,合并相同pk的多次I/U/D操作
                items = DbLoadMerger.merge(items);
                // 按I/U/D进行归并处理
                DbLoadData loadData = new DbLoadData();
                doBefore(items, context, loadData);
                // 执行load操作
                doLoad(context, loadData);
                controller.single(weight.intValue());
                logger.debug("##end load for weight:" + weight);
            }
        }
        interceptor.commit(context);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        interceptor.error(context);
    } catch (Exception e) {
        interceptor.error(context);
        throw new LoadException(e);
    }
    // 返回处理成功的记录
    return context;
}
Also used : DbLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext) Identity(com.alibaba.otter.shared.etl.model.Identity) EventData(com.alibaba.otter.shared.etl.model.EventData) DataIntegrityViolationException(org.springframework.dao.DataIntegrityViolationException) DataAccessException(org.springframework.dao.DataAccessException) SQLException(java.sql.SQLException) DeadlockLoserDataAccessException(org.springframework.dao.DeadlockLoserDataAccessException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 2 with LoadException

use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.

the class DbLoadAction method doDdl.

/**
 * 执行ddl的调用,处理逻辑比较简单: 串行调用
 *
 * @param context
 * @param eventDatas
 */
private void doDdl(DbLoadContext context, List<EventData> eventDatas) {
    for (final EventData data : eventDatas) {
        DataMedia dataMedia = ConfigHelper.findDataMedia(context.getPipeline(), data.getTableId());
        final DbDialect dbDialect = dbDialectFactory.getDbDialect(context.getIdentity().getPipelineId(), (DbMediaSource) dataMedia.getSource());
        Boolean skipDdlException = context.getPipeline().getParameters().getSkipDdlException();
        try {
            Boolean result = dbDialect.getJdbcTemplate().execute(new StatementCallback<Boolean>() {

                public Boolean doInStatement(Statement stmt) throws SQLException, DataAccessException {
                    Boolean result = true;
                    if (dbDialect instanceof MysqlDialect && StringUtils.isNotEmpty(data.getDdlSchemaName())) {
                        // 如果mysql,执行ddl时,切换到在源库执行的schema上
                        // result &= stmt.execute("use " +
                        // data.getDdlSchemaName());
                        // 解决当数据库名称为关键字如"Order"的时候,会报错,无法同步
                        result &= stmt.execute("use `" + data.getDdlSchemaName() + "`");
                    }
                    result &= stmt.execute(data.getSql());
                    return result;
                }
            });
            if (result) {
                // 记录为成功处理的sql
                context.getProcessedDatas().add(data);
            } else {
                context.getFailedDatas().add(data);
            }
        } catch (Throwable e) {
            if (skipDdlException) {
                // do skip
                logger.warn("skip exception for ddl : {} , caused by {}", data, ExceptionUtils.getFullStackTrace(e));
            } else {
                throw new LoadException(e);
            }
        }
    }
}
Also used : MysqlDialect(com.alibaba.otter.node.etl.common.db.dialect.mysql.MysqlDialect) SQLException(java.sql.SQLException) DbDialect(com.alibaba.otter.node.etl.common.db.dialect.DbDialect) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) EventData(com.alibaba.otter.shared.etl.model.EventData) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia) DataAccessException(org.springframework.dao.DataAccessException) DeadlockLoserDataAccessException(org.springframework.dao.DeadlockLoserDataAccessException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 3 with LoadException

use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.

the class DbLoadMerger method mergeUpdate.

private static void mergeUpdate(EventData eventData, Map<RowKey, EventData> result) {
    RowKey rowKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getKeys());
    if (!CollectionUtils.isEmpty(eventData.getOldKeys())) {
        // 存在主键变更
        // 需要解决(1->2 , 2->3)级联主键变更的问题
        RowKey oldKey = new RowKey(eventData.getTableId(), eventData.getSchemaName(), eventData.getTableName(), eventData.getOldKeys());
        if (!result.containsKey(oldKey)) {
            // 不需要级联
            result.put(rowKey, eventData);
        } else {
            EventData oldEventData = result.get(oldKey);
            eventData.setSize(oldEventData.getSize() + eventData.getSize());
            // 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
            if (oldEventData.getEventType() == EventType.INSERT) {
                eventData.setEventType(EventType.INSERT);
                // 删除当前变更数据老主键的记录.
                result.remove(oldKey);
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                // 清空oldkeys,insert记录不需要
                mergeEventData.getOldKeys().clear();
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.UPDATE) {
                // 删除当前变更数据老主键的记录.
                result.remove(oldKey);
                // 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else {
                throw new LoadException("delete(has old pks) + update impossible happed!");
            }
        }
    } else {
        if (!result.containsKey(rowKey)) {
            // 没有主键变更
            result.put(rowKey, eventData);
        } else {
            EventData oldEventData = result.get(rowKey);
            // 如果上一条变更是insert的,就把这一条的eventType改成insert,并且把上一条存在而这一条不存在的字段值拷贝到这一条中
            if (oldEventData.getEventType() == EventType.INSERT) {
                eventData.setEventType(EventType.INSERT);
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.UPDATE) {
                // 可能存在
                // 1->2
                // ,
                // 2update的问题
                // 如果上一条变更是update的,把上一条存在而这一条不存在的数据拷贝到这一条中
                EventData mergeEventData = replaceColumnValue(eventData, oldEventData);
                result.put(rowKey, mergeEventData);
            } else if (oldEventData.getEventType() == EventType.DELETE) {
                // 异常情况,出现 delete + update,那就直接更新为update
                result.put(rowKey, eventData);
            }
        }
    }
}
Also used : EventData(com.alibaba.otter.shared.etl.model.EventData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 4 with LoadException

use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.

the class FileLoadAction method moveFiles.

/**
 * 多线程处理文件加载,使用 fast-fail 策略
 */
private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
    Exception exception = null;
    adjustPoolSize(context);
    ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>(executor);
    List<Future<Exception>> results = new ArrayList<Future<Exception>>();
    for (FileData fileData : fileDatas) {
        Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData));
        results.add(future);
        // fast fail
        if (future.isDone()) {
            // 如果是自己执行的任务(线程池采用 CallerRunsPolicy),则立刻进行检查
            try {
                exception = future.get();
            } catch (Exception e) {
                exception = e;
            }
            if (exception != null) {
                for (Future<Exception> result : results) {
                    if (!result.isDone() && !result.isCancelled()) {
                        result.cancel(true);
                    }
                }
                throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
            }
        }
    }
    int resultSize = results.size();
    int cursor = 0;
    while (cursor < resultSize) {
        try {
            Future<Exception> result = executorComplition.take();
            exception = result.get();
        } catch (Exception e) {
            exception = e;
            break;
        }
        cursor++;
    }
    if (cursor != resultSize) {
        // 发现任务出错,立刻把正在进行的任务取消
        for (Future<Exception> future : results) {
            if (!future.isDone() && !future.isCancelled()) {
                future.cancel(true);
            }
        }
    }
    if (exception != null) {
        throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
    }
}
Also used : ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) FileData(com.alibaba.otter.shared.etl.model.FileData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) IOException(java.io.IOException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Example 5 with LoadException

use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.

the class FileLoadAction method load.

/**
 * 返回结果为已处理成功的记录
 */
public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) {
    if (false == rootDir.exists()) {
        throw new LoadException(rootDir.getPath() + " is not exist");
    }
    FileLoadContext context = buildContext(fileBatch.getIdentity());
    context.setPrepareDatas(fileBatch.getFiles());
    boolean isDryRun = context.getPipeline().getParameters().isDryRun();
    try {
        // 复制成功的文件信息
        WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles());
        List<Long> weights = buckets.weights();
        controller.start(weights);
        // 处理数据
        for (int i = 0; i < weights.size(); i++) {
            Long weight = weights.get(i);
            controller.await(weight.intValue());
            if (logger.isInfoEnabled()) {
                logger.debug("##start load for weight:{}\n", weight);
            }
            // 处理同一个weight下的数据
            List<FileData> items = buckets.getItems(weight);
            if (context.getPipeline().getParameters().isDryRun()) {
                dryRun(context, items, rootDir);
            } else {
                moveFiles(context, items, rootDir);
            }
            controller.single(weight.intValue());
            if (logger.isInfoEnabled()) {
                logger.debug("##end load for weight:{}\n", weight);
            }
        }
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("successed", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("error", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
    } catch (Exception e) {
        if (dump || isDryRun) {
            MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
            logger.info(FileloadDumper.dumpContext("error", context));
            MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
        }
        throw new LoadException(e);
    } finally {
        // 不论是否移动成功,删除临时目录
        NioUtils.delete(rootDir, 3);
    }
    return context;
}
Also used : FileLoadContext(com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext) FileData(com.alibaba.otter.shared.etl.model.FileData) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException) IOException(java.io.IOException) LoadException(com.alibaba.otter.node.etl.load.exception.LoadException)

Aggregations

LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)10 EventData (com.alibaba.otter.shared.etl.model.EventData)4 FileData (com.alibaba.otter.shared.etl.model.FileData)3 File (java.io.File)3 IOException (java.io.IOException)3 SQLException (java.sql.SQLException)3 ArrayList (java.util.ArrayList)3 Future (java.util.concurrent.Future)3 DataAccessException (org.springframework.dao.DataAccessException)3 DeadlockLoserDataAccessException (org.springframework.dao.DeadlockLoserDataAccessException)3 LoadCounter (com.alibaba.otter.node.etl.load.loader.LoadStatsTracker.LoadCounter)2 DbLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.DbLoadContext)2 FileLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext)2 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)2 Identity (com.alibaba.otter.shared.etl.model.Identity)2 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)2 DataIntegrityViolationException (org.springframework.dao.DataIntegrityViolationException)2 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)1 DbDialect (com.alibaba.otter.node.etl.common.db.dialect.DbDialect)1 MysqlDialect (com.alibaba.otter.node.etl.common.db.dialect.mysql.MysqlDialect)1