Search in sources :

Example 56 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class AbstractCollector method registerTaskCommunication.

public void registerTaskCommunication(List<Configuration> taskConfigurationList) {
    for (Configuration taskConfig : taskConfigurationList) {
        int taskId = taskConfig.getInt(CoreConstant.TASK_ID);
        this.taskCommunicationMap.put(taskId, new Communication());
    }
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) Communication(com.alibaba.datax.core.statistics.communication.Communication)

Example 57 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class TaskGroupContainer method start.

@Override
public void start() {
    try {
        /**
             * 状态check时间间隔,较短,可以把任务及时分发到对应channel中
             */
        int sleepIntervalInMillSec = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_SLEEPINTERVAL, 100);
        /**
             * 状态汇报时间间隔,稍长,避免大量汇报
             */
        long reportIntervalInMillSec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_REPORTINTERVAL, 10000);
        /**
             * 2分钟汇报一次性能统计
             */
        // 获取channel数目
        int channelNumber = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL);
        int taskMaxRetryTimes = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_MAXRETRYTIMES, 1);
        long taskRetryIntervalInMsec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_RETRYINTERVALINMSEC, 10000);
        long taskMaxWaitInMsec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_MAXWAITINMSEC, 60000);
        List<Configuration> taskConfigs = this.configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
        if (LOG.isDebugEnabled()) {
            LOG.debug("taskGroup[{}]'s task configs[{}]", this.taskGroupId, JSON.toJSONString(taskConfigs));
        }
        int taskCountInThisTaskGroup = taskConfigs.size();
        LOG.info(String.format("taskGroupId=[%d] start [%d] channels for [%d] tasks.", this.taskGroupId, channelNumber, taskCountInThisTaskGroup));
        this.containerCommunicator.registerCommunication(taskConfigs);
        //taskId与task配置
        Map<Integer, Configuration> taskConfigMap = buildTaskConfigMap(taskConfigs);
        //待运行task列表
        List<Configuration> taskQueue = buildRemainTasks(taskConfigs);
        //taskId与上次失败实例
        Map<Integer, TaskExecutor> taskFailedExecutorMap = new HashMap<Integer, TaskExecutor>();
        //正在运行task
        List<TaskExecutor> runTasks = new ArrayList<TaskExecutor>(channelNumber);
        //任务开始时间
        Map<Integer, Long> taskStartTimeMap = new HashMap<Integer, Long>();
        long lastReportTimeStamp = 0;
        Communication lastTaskGroupContainerCommunication = new Communication();
        while (true) {
            //1.判断task状态
            boolean failedOrKilled = false;
            Map<Integer, Communication> communicationMap = containerCommunicator.getCommunicationMap();
            for (Map.Entry<Integer, Communication> entry : communicationMap.entrySet()) {
                Integer taskId = entry.getKey();
                Communication taskCommunication = entry.getValue();
                if (!taskCommunication.isFinished()) {
                    continue;
                }
                TaskExecutor taskExecutor = removeTask(runTasks, taskId);
                //上面从runTasks里移除了,因此对应在monitor里移除
                taskMonitor.removeTask(taskId);
                //失败,看task是否支持failover,重试次数未超过最大限制
                if (taskCommunication.getState() == State.FAILED) {
                    taskFailedExecutorMap.put(taskId, taskExecutor);
                    if (taskExecutor.supportFailOver() && taskExecutor.getAttemptCount() < taskMaxRetryTimes) {
                        //关闭老的executor
                        taskExecutor.shutdown();
                        //将task的状态重置
                        containerCommunicator.resetCommunication(taskId);
                        Configuration taskConfig = taskConfigMap.get(taskId);
                        //重新加入任务列表
                        taskQueue.add(taskConfig);
                    } else {
                        failedOrKilled = true;
                        break;
                    }
                } else if (taskCommunication.getState() == State.KILLED) {
                    failedOrKilled = true;
                    break;
                } else if (taskCommunication.getState() == State.SUCCEEDED) {
                    Long taskStartTime = taskStartTimeMap.get(taskId);
                    if (taskStartTime != null) {
                        Long usedTime = System.currentTimeMillis() - taskStartTime;
                        LOG.info("taskGroup[{}] taskId[{}] is successed, used[{}]ms", this.taskGroupId, taskId, usedTime);
                        //usedTime*1000*1000 转换成PerfRecord记录的ns,这里主要是简单登记,进行最长任务的打印。因此增加特定静态方法
                        PerfRecord.addPerfRecord(taskGroupId, taskId, PerfRecord.PHASE.TASK_TOTAL, taskStartTime, usedTime * 1000L * 1000L);
                        taskStartTimeMap.remove(taskId);
                        taskConfigMap.remove(taskId);
                    }
                }
            }
            // 2.发现该taskGroup下taskExecutor的总状态失败则汇报错误
            if (failedOrKilled) {
                lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
                throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_RUNTIME_ERROR, lastTaskGroupContainerCommunication.getThrowable());
            }
            //3.有任务未执行,且正在运行的任务数小于最大通道限制
            Iterator<Configuration> iterator = taskQueue.iterator();
            while (iterator.hasNext() && runTasks.size() < channelNumber) {
                Configuration taskConfig = iterator.next();
                Integer taskId = taskConfig.getInt(CoreConstant.TASK_ID);
                int attemptCount = 1;
                TaskExecutor lastExecutor = taskFailedExecutorMap.get(taskId);
                if (lastExecutor != null) {
                    attemptCount = lastExecutor.getAttemptCount() + 1;
                    long now = System.currentTimeMillis();
                    long failedTime = lastExecutor.getTimeStamp();
                    if (now - failedTime < taskRetryIntervalInMsec) {
                        //未到等待时间,继续留在队列
                        continue;
                    }
                    if (!lastExecutor.isShutdown()) {
                        //上次失败的task仍未结束
                        if (now - failedTime > taskMaxWaitInMsec) {
                            markCommunicationFailed(taskId);
                            reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
                            throw DataXException.asDataXException(CommonErrorCode.WAIT_TIME_EXCEED, "task failover等待超时");
                        } else {
                            //再次尝试关闭
                            lastExecutor.shutdown();
                            continue;
                        }
                    } else {
                        LOG.info("taskGroup[{}] taskId[{}] attemptCount[{}] has already shutdown", this.taskGroupId, taskId, lastExecutor.getAttemptCount());
                    }
                }
                Configuration taskConfigForRun = taskMaxRetryTimes > 1 ? taskConfig.clone() : taskConfig;
                TaskExecutor taskExecutor = new TaskExecutor(taskConfigForRun, attemptCount);
                taskStartTimeMap.put(taskId, System.currentTimeMillis());
                taskExecutor.doStart();
                iterator.remove();
                runTasks.add(taskExecutor);
                //上面,增加task到runTasks列表,因此在monitor里注册。
                taskMonitor.registerTask(taskId, this.containerCommunicator.getCommunication(taskId));
                taskFailedExecutorMap.remove(taskId);
                LOG.info("taskGroup[{}] taskId[{}] attemptCount[{}] is started", this.taskGroupId, taskId, attemptCount);
            }
            //4.任务列表为空,executor已结束, 搜集状态为success--->成功
            if (taskQueue.isEmpty() && isAllTaskDone(runTasks) && containerCommunicator.collectState() == State.SUCCEEDED) {
                // 成功的情况下,也需要汇报一次。否则在任务结束非常快的情况下,采集的信息将会不准确
                lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
                LOG.info("taskGroup[{}] completed it's tasks.", this.taskGroupId);
                break;
            }
            // 5.如果当前时间已经超出汇报时间的interval,那么我们需要马上汇报
            long now = System.currentTimeMillis();
            if (now - lastReportTimeStamp > reportIntervalInMillSec) {
                lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
                lastReportTimeStamp = now;
                //taskMonitor对于正在运行的task,每reportIntervalInMillSec进行检查
                for (TaskExecutor taskExecutor : runTasks) {
                    taskMonitor.report(taskExecutor.getTaskId(), this.containerCommunicator.getCommunication(taskExecutor.getTaskId()));
                }
            }
            Thread.sleep(sleepIntervalInMillSec);
        }
        //6.最后还要汇报一次
        reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
    } catch (Throwable e) {
        Communication nowTaskGroupContainerCommunication = this.containerCommunicator.collect();
        if (nowTaskGroupContainerCommunication.getThrowable() == null) {
            nowTaskGroupContainerCommunication.setThrowable(e);
        }
        nowTaskGroupContainerCommunication.setState(State.FAILED);
        this.containerCommunicator.report(nowTaskGroupContainerCommunication);
        throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, e);
    } finally {
        if (!PerfTrace.getInstance().isJob()) {
            //最后打印cpu的平均消耗,GC的统计
            VMInfo vmInfo = VMInfo.getVmInfo();
            if (vmInfo != null) {
                vmInfo.getDelta(false);
                LOG.info(vmInfo.totalString());
            }
            LOG.info(PerfTrace.getInstance().summarizeNoException());
        }
    }
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) VMInfo(com.alibaba.datax.common.statistics.VMInfo) Communication(com.alibaba.datax.core.statistics.communication.Communication)

Example 58 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class TaskGroupContainer method buildTaskConfigMap.

private Map<Integer, Configuration> buildTaskConfigMap(List<Configuration> configurations) {
    Map<Integer, Configuration> map = new HashMap<Integer, Configuration>();
    for (Configuration taskConfig : configurations) {
        int taskId = taskConfig.getInt(CoreConstant.TASK_ID);
        map.put(taskId, taskConfig);
    }
    return map;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration)

Example 59 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Engine method filterJobConfiguration.

// 注意屏蔽敏感信息
public static String filterJobConfiguration(final Configuration configuration) {
    Configuration jobConfWithSetting = configuration.getConfiguration("job").clone();
    Configuration jobContent = jobConfWithSetting.getConfiguration("content");
    filterSensitiveConfiguration(jobContent);
    jobConfWithSetting.set("content", jobContent);
    return jobConfWithSetting.beautify();
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration)

Example 60 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Engine method start.

/* check job model (job/task) first */
public void start(Configuration allConf) {
    // 绑定column转换信息
    ColumnCast.bind(allConf);
    /**
         * 初始化PluginLoader,可以获取各种插件配置
         */
    LoadUtil.bind(allConf);
    boolean isJob = !("taskGroup".equalsIgnoreCase(allConf.getString(CoreConstant.DATAX_CORE_CONTAINER_MODEL)));
    //JobContainer会在schedule后再行进行设置和调整值
    int channelNumber = 0;
    AbstractContainer container;
    long instanceId;
    int taskGroupId = -1;
    if (isJob) {
        allConf.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_MODE, RUNTIME_MODE);
        container = new JobContainer(allConf);
        instanceId = allConf.getLong(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, 0);
    } else {
        container = new TaskGroupContainer(allConf);
        instanceId = allConf.getLong(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID);
        taskGroupId = allConf.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID);
        channelNumber = allConf.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL);
    }
    //缺省打开perfTrace
    boolean traceEnable = allConf.getBool(CoreConstant.DATAX_CORE_CONTAINER_TRACE_ENABLE, true);
    boolean perfReportEnable = allConf.getBool(CoreConstant.DATAX_CORE_REPORT_DATAX_PERFLOG, true);
    //standlone模式的datax shell任务不进行汇报
    if (instanceId == -1) {
        perfReportEnable = false;
    }
    int priority = 0;
    try {
        priority = Integer.parseInt(System.getenv("SKYNET_PRIORITY"));
    } catch (NumberFormatException e) {
        LOG.warn("prioriy set to 0, because NumberFormatException, the value is: " + System.getProperty("PROIORY"));
    }
    Configuration jobInfoConfig = allConf.getConfiguration(CoreConstant.DATAX_JOB_JOBINFO);
    //初始化PerfTrace
    PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, priority, traceEnable);
    perfTrace.setJobInfo(jobInfoConfig, perfReportEnable, channelNumber);
    container.start();
}
Also used : JobContainer(com.alibaba.datax.core.job.JobContainer) TaskGroupContainer(com.alibaba.datax.core.taskgroup.TaskGroupContainer) Configuration(com.alibaba.datax.common.util.Configuration) PerfTrace(com.alibaba.datax.common.statistics.PerfTrace)

Aggregations

Configuration (com.alibaba.datax.common.util.Configuration)82 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)19 Communication (com.alibaba.datax.core.statistics.communication.Communication)13 DataXException (com.alibaba.datax.common.exception.DataXException)9 Method (java.lang.reflect.Method)8 Record (com.alibaba.datax.common.element.Record)7 JobContainer (com.alibaba.datax.core.job.JobContainer)6 IOException (java.io.IOException)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 LongColumn (com.alibaba.datax.common.element.LongColumn)4 TaskPluginCollector (com.alibaba.datax.common.plugin.TaskPluginCollector)4 TaskGroupContainer (com.alibaba.datax.core.taskgroup.TaskGroupContainer)4 Channel (com.alibaba.datax.core.transport.channel.Channel)4 MemoryChannel (com.alibaba.datax.core.transport.channel.memory.MemoryChannel)4 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)4 File (java.io.File)4 HashSet (java.util.HashSet)3 List (java.util.List)3 VMInfo (com.alibaba.datax.common.statistics.VMInfo)2