use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class AbstractCollector method registerTaskCommunication.
public void registerTaskCommunication(List<Configuration> taskConfigurationList) {
for (Configuration taskConfig : taskConfigurationList) {
int taskId = taskConfig.getInt(CoreConstant.TASK_ID);
this.taskCommunicationMap.put(taskId, new Communication());
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class TaskGroupContainer method start.
@Override
public void start() {
try {
/**
* 状态check时间间隔,较短,可以把任务及时分发到对应channel中
*/
int sleepIntervalInMillSec = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_SLEEPINTERVAL, 100);
/**
* 状态汇报时间间隔,稍长,避免大量汇报
*/
long reportIntervalInMillSec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_REPORTINTERVAL, 10000);
/**
* 2分钟汇报一次性能统计
*/
// 获取channel数目
int channelNumber = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL);
int taskMaxRetryTimes = this.configuration.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_MAXRETRYTIMES, 1);
long taskRetryIntervalInMsec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_RETRYINTERVALINMSEC, 10000);
long taskMaxWaitInMsec = this.configuration.getLong(CoreConstant.DATAX_CORE_CONTAINER_TASK_FAILOVER_MAXWAITINMSEC, 60000);
List<Configuration> taskConfigs = this.configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
if (LOG.isDebugEnabled()) {
LOG.debug("taskGroup[{}]'s task configs[{}]", this.taskGroupId, JSON.toJSONString(taskConfigs));
}
int taskCountInThisTaskGroup = taskConfigs.size();
LOG.info(String.format("taskGroupId=[%d] start [%d] channels for [%d] tasks.", this.taskGroupId, channelNumber, taskCountInThisTaskGroup));
this.containerCommunicator.registerCommunication(taskConfigs);
//taskId与task配置
Map<Integer, Configuration> taskConfigMap = buildTaskConfigMap(taskConfigs);
//待运行task列表
List<Configuration> taskQueue = buildRemainTasks(taskConfigs);
//taskId与上次失败实例
Map<Integer, TaskExecutor> taskFailedExecutorMap = new HashMap<Integer, TaskExecutor>();
//正在运行task
List<TaskExecutor> runTasks = new ArrayList<TaskExecutor>(channelNumber);
//任务开始时间
Map<Integer, Long> taskStartTimeMap = new HashMap<Integer, Long>();
long lastReportTimeStamp = 0;
Communication lastTaskGroupContainerCommunication = new Communication();
while (true) {
//1.判断task状态
boolean failedOrKilled = false;
Map<Integer, Communication> communicationMap = containerCommunicator.getCommunicationMap();
for (Map.Entry<Integer, Communication> entry : communicationMap.entrySet()) {
Integer taskId = entry.getKey();
Communication taskCommunication = entry.getValue();
if (!taskCommunication.isFinished()) {
continue;
}
TaskExecutor taskExecutor = removeTask(runTasks, taskId);
//上面从runTasks里移除了,因此对应在monitor里移除
taskMonitor.removeTask(taskId);
//失败,看task是否支持failover,重试次数未超过最大限制
if (taskCommunication.getState() == State.FAILED) {
taskFailedExecutorMap.put(taskId, taskExecutor);
if (taskExecutor.supportFailOver() && taskExecutor.getAttemptCount() < taskMaxRetryTimes) {
//关闭老的executor
taskExecutor.shutdown();
//将task的状态重置
containerCommunicator.resetCommunication(taskId);
Configuration taskConfig = taskConfigMap.get(taskId);
//重新加入任务列表
taskQueue.add(taskConfig);
} else {
failedOrKilled = true;
break;
}
} else if (taskCommunication.getState() == State.KILLED) {
failedOrKilled = true;
break;
} else if (taskCommunication.getState() == State.SUCCEEDED) {
Long taskStartTime = taskStartTimeMap.get(taskId);
if (taskStartTime != null) {
Long usedTime = System.currentTimeMillis() - taskStartTime;
LOG.info("taskGroup[{}] taskId[{}] is successed, used[{}]ms", this.taskGroupId, taskId, usedTime);
//usedTime*1000*1000 转换成PerfRecord记录的ns,这里主要是简单登记,进行最长任务的打印。因此增加特定静态方法
PerfRecord.addPerfRecord(taskGroupId, taskId, PerfRecord.PHASE.TASK_TOTAL, taskStartTime, usedTime * 1000L * 1000L);
taskStartTimeMap.remove(taskId);
taskConfigMap.remove(taskId);
}
}
}
// 2.发现该taskGroup下taskExecutor的总状态失败则汇报错误
if (failedOrKilled) {
lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_RUNTIME_ERROR, lastTaskGroupContainerCommunication.getThrowable());
}
//3.有任务未执行,且正在运行的任务数小于最大通道限制
Iterator<Configuration> iterator = taskQueue.iterator();
while (iterator.hasNext() && runTasks.size() < channelNumber) {
Configuration taskConfig = iterator.next();
Integer taskId = taskConfig.getInt(CoreConstant.TASK_ID);
int attemptCount = 1;
TaskExecutor lastExecutor = taskFailedExecutorMap.get(taskId);
if (lastExecutor != null) {
attemptCount = lastExecutor.getAttemptCount() + 1;
long now = System.currentTimeMillis();
long failedTime = lastExecutor.getTimeStamp();
if (now - failedTime < taskRetryIntervalInMsec) {
//未到等待时间,继续留在队列
continue;
}
if (!lastExecutor.isShutdown()) {
//上次失败的task仍未结束
if (now - failedTime > taskMaxWaitInMsec) {
markCommunicationFailed(taskId);
reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
throw DataXException.asDataXException(CommonErrorCode.WAIT_TIME_EXCEED, "task failover等待超时");
} else {
//再次尝试关闭
lastExecutor.shutdown();
continue;
}
} else {
LOG.info("taskGroup[{}] taskId[{}] attemptCount[{}] has already shutdown", this.taskGroupId, taskId, lastExecutor.getAttemptCount());
}
}
Configuration taskConfigForRun = taskMaxRetryTimes > 1 ? taskConfig.clone() : taskConfig;
TaskExecutor taskExecutor = new TaskExecutor(taskConfigForRun, attemptCount);
taskStartTimeMap.put(taskId, System.currentTimeMillis());
taskExecutor.doStart();
iterator.remove();
runTasks.add(taskExecutor);
//上面,增加task到runTasks列表,因此在monitor里注册。
taskMonitor.registerTask(taskId, this.containerCommunicator.getCommunication(taskId));
taskFailedExecutorMap.remove(taskId);
LOG.info("taskGroup[{}] taskId[{}] attemptCount[{}] is started", this.taskGroupId, taskId, attemptCount);
}
//4.任务列表为空,executor已结束, 搜集状态为success--->成功
if (taskQueue.isEmpty() && isAllTaskDone(runTasks) && containerCommunicator.collectState() == State.SUCCEEDED) {
// 成功的情况下,也需要汇报一次。否则在任务结束非常快的情况下,采集的信息将会不准确
lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
LOG.info("taskGroup[{}] completed it's tasks.", this.taskGroupId);
break;
}
// 5.如果当前时间已经超出汇报时间的interval,那么我们需要马上汇报
long now = System.currentTimeMillis();
if (now - lastReportTimeStamp > reportIntervalInMillSec) {
lastTaskGroupContainerCommunication = reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
lastReportTimeStamp = now;
//taskMonitor对于正在运行的task,每reportIntervalInMillSec进行检查
for (TaskExecutor taskExecutor : runTasks) {
taskMonitor.report(taskExecutor.getTaskId(), this.containerCommunicator.getCommunication(taskExecutor.getTaskId()));
}
}
Thread.sleep(sleepIntervalInMillSec);
}
//6.最后还要汇报一次
reportTaskGroupCommunication(lastTaskGroupContainerCommunication, taskCountInThisTaskGroup);
} catch (Throwable e) {
Communication nowTaskGroupContainerCommunication = this.containerCommunicator.collect();
if (nowTaskGroupContainerCommunication.getThrowable() == null) {
nowTaskGroupContainerCommunication.setThrowable(e);
}
nowTaskGroupContainerCommunication.setState(State.FAILED);
this.containerCommunicator.report(nowTaskGroupContainerCommunication);
throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, e);
} finally {
if (!PerfTrace.getInstance().isJob()) {
//最后打印cpu的平均消耗,GC的统计
VMInfo vmInfo = VMInfo.getVmInfo();
if (vmInfo != null) {
vmInfo.getDelta(false);
LOG.info(vmInfo.totalString());
}
LOG.info(PerfTrace.getInstance().summarizeNoException());
}
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class TaskGroupContainer method buildTaskConfigMap.
private Map<Integer, Configuration> buildTaskConfigMap(List<Configuration> configurations) {
Map<Integer, Configuration> map = new HashMap<Integer, Configuration>();
for (Configuration taskConfig : configurations) {
int taskId = taskConfig.getInt(CoreConstant.TASK_ID);
map.put(taskId, taskConfig);
}
return map;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class Engine method filterJobConfiguration.
// 注意屏蔽敏感信息
public static String filterJobConfiguration(final Configuration configuration) {
Configuration jobConfWithSetting = configuration.getConfiguration("job").clone();
Configuration jobContent = jobConfWithSetting.getConfiguration("content");
filterSensitiveConfiguration(jobContent);
jobConfWithSetting.set("content", jobContent);
return jobConfWithSetting.beautify();
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class Engine method start.
/* check job model (job/task) first */
public void start(Configuration allConf) {
// 绑定column转换信息
ColumnCast.bind(allConf);
/**
* 初始化PluginLoader,可以获取各种插件配置
*/
LoadUtil.bind(allConf);
boolean isJob = !("taskGroup".equalsIgnoreCase(allConf.getString(CoreConstant.DATAX_CORE_CONTAINER_MODEL)));
//JobContainer会在schedule后再行进行设置和调整值
int channelNumber = 0;
AbstractContainer container;
long instanceId;
int taskGroupId = -1;
if (isJob) {
allConf.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_MODE, RUNTIME_MODE);
container = new JobContainer(allConf);
instanceId = allConf.getLong(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, 0);
} else {
container = new TaskGroupContainer(allConf);
instanceId = allConf.getLong(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID);
taskGroupId = allConf.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID);
channelNumber = allConf.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL);
}
//缺省打开perfTrace
boolean traceEnable = allConf.getBool(CoreConstant.DATAX_CORE_CONTAINER_TRACE_ENABLE, true);
boolean perfReportEnable = allConf.getBool(CoreConstant.DATAX_CORE_REPORT_DATAX_PERFLOG, true);
//standlone模式的datax shell任务不进行汇报
if (instanceId == -1) {
perfReportEnable = false;
}
int priority = 0;
try {
priority = Integer.parseInt(System.getenv("SKYNET_PRIORITY"));
} catch (NumberFormatException e) {
LOG.warn("prioriy set to 0, because NumberFormatException, the value is: " + System.getProperty("PROIORY"));
}
Configuration jobInfoConfig = allConf.getConfiguration(CoreConstant.DATAX_JOB_JOBINFO);
//初始化PerfTrace
PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, priority, traceEnable);
perfTrace.setJobInfo(jobInfoConfig, perfReportEnable, channelNumber);
container.start();
}
Aggregations