use of backtype.storm.generated.TaskHeartbeat in project jstorm by alibaba.
the class TaskHeartbeatUpdater method process.
public void process(Tuple input) {
int sourceTask = input.getSourceTask();
int uptime = (Integer) input.getValue(0);
// Update the heartbeat for source task
TaskHeartbeat taskHb = taskHbMap.get(sourceTask);
if (taskHb == null) {
taskHb = new TaskHeartbeat(TimeUtils.current_time_secs(), uptime);
taskHbMap.put(sourceTask, taskHb);
} else {
taskHb.set_time(TimeUtils.current_time_secs());
taskHb.set_uptime(uptime);
}
// Send heartbeat info of all tasks to nimbus
if (sourceTask == taskId) {
// Send heartbeat info of MAX_NUM_TASK_HB_SEND tasks each time
TopologyTaskHbInfo tmpTaskHbInfo = new TopologyTaskHbInfo(topologyId, taskId);
Map<Integer, TaskHeartbeat> tmpTaskHbMap = new ConcurrentHashMap<Integer, TaskHeartbeat>();
tmpTaskHbInfo.set_taskHbs(tmpTaskHbMap);
int sendCount = 0;
for (Entry<Integer, TaskHeartbeat> entry : taskHbMap.entrySet()) {
tmpTaskHbMap.put(entry.getKey(), entry.getValue());
sendCount++;
if (sendCount >= MAX_NUM_TASK_HB_SEND) {
setTaskHeatbeat(tmpTaskHbInfo);
tmpTaskHbMap.clear();
sendCount = 0;
}
}
if (tmpTaskHbMap.size() > 0) {
setTaskHeatbeat(tmpTaskHbInfo);
}
}
}
use of backtype.storm.generated.TaskHeartbeat in project jstorm by alibaba.
the class ServiceHandler method getTopologyInfo.
/**
* Get TopologyInfo, it contain all topology running data
*
* @return TopologyInfo
*/
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws TException {
long start = System.nanoTime();
StormClusterState stormClusterState = data.getStormClusterState();
try {
// get topology's StormBase
StormBase base = stormClusterState.storm_base(topologyId, null);
if (base == null) {
throw new NotAliveException("No topology of " + topologyId);
}
Assignment assignment = stormClusterState.assignment_info(topologyId, null);
if (assignment == null) {
throw new NotAliveException("No topology of " + topologyId);
}
TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
Map<Integer, TaskHeartbeat> taskHbMap = null;
if (topologyTaskHbInfo != null)
taskHbMap = topologyTaskHbInfo.get_taskHbs();
Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
String errorString;
if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
errorString = "Y";
} else {
errorString = "";
}
TopologySummary topologySummary = new TopologySummary();
topologySummary.set_id(topologyId);
topologySummary.set_name(base.getStormName());
topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
topologySummary.set_status(base.getStatusString());
topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
topologySummary.set_numWorkers(assignment.getWorkers().size());
topologySummary.set_errorInfo(errorString);
Map<String, ComponentSummary> componentSummaryMap = new HashMap<>();
HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
String name = entry.getKey();
List<Integer> taskIds = entry.getValue();
if (taskIds == null || taskIds.size() == 0) {
LOG.warn("No task of component " + name);
continue;
}
ComponentSummary componentSummary = new ComponentSummary();
componentSummaryMap.put(name, componentSummary);
componentSummary.set_name(name);
componentSummary.set_type(taskToType.get(taskIds.get(0)));
componentSummary.set_parallel(taskIds.size());
componentSummary.set_taskIds(taskIds);
}
Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<>();
Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
for (Integer taskId : taskInfoMap.keySet()) {
TaskSummary taskSummary = new TaskSummary();
taskSummaryMap.put(taskId, taskSummary);
taskSummary.set_taskId(taskId);
if (taskHbMap == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
TaskHeartbeat hb = taskHbMap.get(taskId);
if (hb == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
if (isInactive)
taskSummary.set_status("INACTIVE");
else
taskSummary.set_status("ACTIVE");
taskSummary.set_uptime(hb.get_uptime());
}
}
if (StringUtils.isBlank(errorString)) {
continue;
}
List<TaskError> taskErrorList = taskErrors.get(taskId);
if (taskErrorList != null && taskErrorList.size() != 0) {
for (TaskError taskError : taskErrorList) {
ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
taskSummary.add_to_errors(errorInfo);
String component = taskToComponent.get(taskId);
componentSummaryMap.get(component).add_to_errors(errorInfo);
}
}
}
for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
String hostname = workerSlot.getHostname();
int port = workerSlot.getPort();
for (Integer taskId : workerSlot.getTasks()) {
TaskSummary taskSummary = taskSummaryMap.get(taskId);
taskSummary.set_host(hostname);
taskSummary.set_port(port);
}
}
TopologyInfo topologyInfo = new TopologyInfo();
topologyInfo.set_topology(topologySummary);
topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
// return topology metric & component metric only
List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
List<MetricInfo> compStreamMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT_STREAM);
MetricInfo taskMetric = MetricUtils.mkMetricInfo();
MetricInfo streamMetric = MetricUtils.mkMetricInfo();
MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
MetricInfo tpMetric, compMetric, compStreamMetric, workerMetric;
if (tpMetricList == null || tpMetricList.size() == 0) {
tpMetric = MetricUtils.mkMetricInfo();
} else {
// get the last min topology metric
tpMetric = tpMetricList.get(tpMetricList.size() - 1);
}
if (compMetricList == null || compMetricList.size() == 0) {
compMetric = MetricUtils.mkMetricInfo();
} else {
compMetric = compMetricList.get(0);
}
if (compStreamMetricList == null || compStreamMetricList.size() == 0) {
compStreamMetric = MetricUtils.mkMetricInfo();
} else {
compStreamMetric = compStreamMetricList.get(0);
}
if (workerMetricList == null || workerMetricList.size() == 0) {
workerMetric = MetricUtils.mkMetricInfo();
} else {
workerMetric = workerMetricList.get(0);
}
TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
topologyMetrics.set_compStreamMetric(compStreamMetric);
topologyInfo.set_metrics(topologyMetrics);
return topologyInfo;
} catch (TException e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw e;
} catch (Exception e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw new TException("Failed to get topologyInfo" + topologyId);
} finally {
long end = System.nanoTime();
SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
}
}
use of backtype.storm.generated.TaskHeartbeat in project jstorm by alibaba.
the class NimbusUtils method isTaskDead.
public static boolean isTaskDead(NimbusData data, String topologyId, Integer taskId) {
String idStr = " topology:" + topologyId + ",task id:" + taskId;
TopologyTaskHbInfo topoTasksHbInfo = data.getTasksHeartbeat().get(topologyId);
Map<Integer, TaskHeartbeat> taskHbMap = null;
Integer taskReportTime = null;
if (topoTasksHbInfo != null) {
taskHbMap = topoTasksHbInfo.get_taskHbs();
if (taskHbMap != null) {
TaskHeartbeat tHb = taskHbMap.get(taskId);
taskReportTime = ((tHb != null) ? tHb.get_time() : null);
}
}
Map<Integer, TkHbCacheTime> taskHBs = data.getTaskHeartbeatsCache(topologyId, true);
TkHbCacheTime taskHB = taskHBs.get(taskId);
if (taskHB == null) {
LOG.debug("No task heartbeat cache " + idStr);
if (topoTasksHbInfo == null || taskHbMap == null) {
LOG.info("No task heartbeat was reported for " + idStr);
return true;
}
taskHB = new TkHbCacheTime();
taskHB.update(taskHbMap.get(taskId));
taskHBs.put(taskId, taskHB);
return false;
}
if (taskReportTime == null || taskReportTime < taskHB.getTaskAssignedTime()) {
LOG.debug("No task heartbeat was reported for " + idStr);
// Task hasn't finish init
int nowSecs = TimeUtils.current_time_secs();
int assignSecs = taskHB.getTaskAssignedTime();
// default to 4 min
int waitInitTimeout = JStormUtils.parseInt(data.getConf().get(Config.NIMBUS_TASK_LAUNCH_SECS));
if (nowSecs - assignSecs > waitInitTimeout) {
LOG.info(idStr + " failed to init ");
return true;
} else {
return false;
}
}
// the left is zkReportTime isn't null
// task has finished initialization
int nimbusTime = taskHB.getNimbusTime();
int reportTime = taskHB.getTaskReportedTime();
int nowSecs = TimeUtils.current_time_secs();
if (nimbusTime == 0) {
// taskHB no entry, first time
// update taskHBtaskReportTime
taskHB.setNimbusTime(nowSecs);
taskHB.setTaskReportedTime(taskReportTime);
LOG.info("Update task heartbeat to nimbus cache " + idStr);
return false;
}
if (reportTime != taskReportTime) {
// zk has been updated the report time
taskHB.setNimbusTime(nowSecs);
taskHB.setTaskReportedTime(taskReportTime);
LOG.debug(idStr + ",nimbusTime " + nowSecs + ",zkReport:" + taskReportTime + ",report:" + reportTime);
return false;
}
// the following is (zkReportTime == reportTime)
Integer taskHBTimeout = data.getTopologyTaskTimeout().get(topologyId);
if (taskHBTimeout == null)
// default to 2 min
taskHBTimeout = JStormUtils.parseInt(data.getConf().get(Config.NIMBUS_TASK_TIMEOUT_SECS));
if (taskId == topoTasksHbInfo.get_topologyMasterId())
taskHBTimeout = (taskHBTimeout / 2);
if (nowSecs - nimbusTime > taskHBTimeout) {
// task is dead
long ts = ((long) nimbusTime) * 1000;
Date lastTaskHBDate = new Date(ts);
LOG.debug(idStr + " last task time is " + nimbusTime + ":" + lastTaskHBDate + ",current " + nowSecs + ":" + new Date(((long) nowSecs) * 1000));
return true;
}
return false;
}
use of backtype.storm.generated.TaskHeartbeat in project jstorm by alibaba.
the class MonitorRunnable method run.
/**
* Todo: when one topology is being reassigned, the topology should skip check
*/
@Override
public void run() {
StormClusterState clusterState = data.getStormClusterState();
try {
// Note: need first check Assignments
List<String> activeTopologies = clusterState.assignments(null);
if (activeTopologies == null) {
LOG.info("Failed to get active topologies");
return;
}
for (String topologyId : activeTopologies) {
if (clusterState.storm_base(topologyId, null) == null) {
continue;
}
LOG.debug("Check tasks of topology " + topologyId);
// Note that we don't check /ZK-dir/taskbeats/topologyId to get task ids
Set<Integer> taskIds = clusterState.task_ids(topologyId);
if (taskIds == null) {
LOG.info("Failed to get task ids of " + topologyId);
continue;
}
Assignment assignment = clusterState.assignment_info(topologyId, null);
Set<Integer> deadTasks = new HashSet<>();
boolean needReassign = false;
for (Integer task : taskIds) {
boolean isTaskDead = NimbusUtils.isTaskDead(data, topologyId, task);
if (isTaskDead) {
deadTasks.add(task);
needReassign = true;
}
}
TopologyTaskHbInfo topologyHbInfo = data.getTasksHeartbeat().get(topologyId);
if (needReassign) {
if (topologyHbInfo != null) {
int topologyMasterId = topologyHbInfo.get_topologyMasterId();
if (deadTasks.contains(topologyMasterId)) {
deadTasks.clear();
if (assignment != null) {
ResourceWorkerSlot resource = assignment.getWorkerByTaskId(topologyMasterId);
if (resource != null)
deadTasks.addAll(resource.getTasks());
else
deadTasks.add(topologyMasterId);
}
} else {
Map<Integer, TaskHeartbeat> taskHbs = topologyHbInfo.get_taskHbs();
int launchTime = JStormUtils.parseInt(data.getConf().get(Config.NIMBUS_TASK_LAUNCH_SECS));
if (taskHbs == null || taskHbs.get(topologyMasterId) == null || taskHbs.get(topologyMasterId).get_uptime() < launchTime) {
/*try {
clusterState.topology_heartbeat(topologyId, topologyHbInfo);
} catch (Exception e) {
LOG.error("Failed to update task heartbeat info to ZK for " + topologyId, e);
}*/
return;
}
}
Map<Integer, ResourceWorkerSlot> deadTaskWorkers = new HashMap<>();
for (Integer task : deadTasks) {
LOG.info("Found " + topologyId + ", taskId:" + task + " is dead");
ResourceWorkerSlot resource = null;
if (assignment != null)
resource = assignment.getWorkerByTaskId(task);
if (resource != null) {
deadTaskWorkers.put(task, resource);
}
}
Map<ResourceWorkerSlot, List<Integer>> workersDeadTasks = JStormUtils.reverse_map(deadTaskWorkers);
for (Map.Entry<ResourceWorkerSlot, List<Integer>> entry : workersDeadTasks.entrySet()) {
ResourceWorkerSlot resource = entry.getKey();
// we only report one task
for (Integer task : entry.getValue()) {
Date now = new Date();
String nowStr = TimeFormat.getSecond(now);
String errorInfo = "Task-" + entry.getValue().toString() + " is dead on " + resource.getHostname() + ":" + resource.getPort() + ", " + nowStr;
LOG.info(errorInfo);
clusterState.report_task_error(topologyId, task, errorInfo, ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD, ErrorConstants.DURATION_SECS_TASK_DEAD);
break;
}
}
if (deadTaskWorkers.size() > 0) {
// notify jstorm monitor
TaskDeadEvent.pushEvent(topologyId, deadTaskWorkers);
}
}
NimbusUtils.transition(data, topologyId, false, StatusType.monitor);
}
if (topologyHbInfo != null) {
try {
clusterState.topology_heartbeat(topologyId, topologyHbInfo);
} catch (Exception e) {
LOG.error("Failed to update task heartbeat info to ZK for " + topologyId, e);
}
}
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
}
use of backtype.storm.generated.TaskHeartbeat in project jstorm by alibaba.
the class ServiceHandler method updateTaskHeartbeat.
@Override
public void updateTaskHeartbeat(TopologyTaskHbInfo taskHbs) throws TException {
String topologyId = taskHbs.get_topologyId();
Integer topologyMasterId = taskHbs.get_topologyMasterId();
TopologyTaskHbInfo nimbusTaskHbs = data.getTasksHeartbeat().get(topologyId);
if (nimbusTaskHbs == null) {
nimbusTaskHbs = new TopologyTaskHbInfo(topologyId, topologyMasterId);
data.getTasksHeartbeat().put(topologyId, nimbusTaskHbs);
}
Map<Integer, TaskHeartbeat> nimbusTaskHbMap = nimbusTaskHbs.get_taskHbs();
if (nimbusTaskHbMap == null) {
nimbusTaskHbMap = new ConcurrentHashMap<>();
nimbusTaskHbs.set_taskHbs(nimbusTaskHbMap);
}
Map<Integer, TaskHeartbeat> taskHbMap = taskHbs.get_taskHbs();
if (taskHbMap != null) {
for (Entry<Integer, TaskHeartbeat> entry : taskHbMap.entrySet()) {
nimbusTaskHbMap.put(entry.getKey(), entry.getValue());
}
}
}
Aggregations