use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class MonitorRunnable method run.
/**
* @@@ Todo when one topology is being reassigned, the topology should skip check
*/
@Override
public void run() {
StormClusterState clusterState = data.getStormClusterState();
try {
// Attetion, need first check Assignments
List<String> active_topologys = clusterState.assignments(null);
if (active_topologys == null) {
LOG.info("Failed to get active topologies");
return;
}
for (String topologyid : active_topologys) {
if (clusterState.storm_base(topologyid, null) == null) {
continue;
}
LOG.debug("Check tasks " + topologyid);
// Attention, here don't check /ZK-dir/taskbeats/topologyid to
// get task ids
Set<Integer> taskIds = clusterState.task_ids(topologyid);
if (taskIds == null) {
LOG.info("Failed to get task ids of " + topologyid);
continue;
}
Assignment assignment = clusterState.assignment_info(topologyid, null);
Set<Integer> deadTasks = new HashSet<Integer>();
boolean needReassign = false;
for (Integer task : taskIds) {
boolean isTaskDead = NimbusUtils.isTaskDead(data, topologyid, task);
if (isTaskDead) {
deadTasks.add(task);
needReassign = true;
}
}
TopologyTaskHbInfo topologyHbInfo = data.getTasksHeartbeat().get(topologyid);
if (needReassign) {
if (topologyHbInfo != null) {
int topologyMasterId = topologyHbInfo.get_topologyMasterId();
if (deadTasks.contains(topologyMasterId)) {
deadTasks.clear();
if (assignment != null) {
ResourceWorkerSlot resource = assignment.getWorkerByTaskId(topologyMasterId);
if (resource != null)
deadTasks.addAll(resource.getTasks());
else
deadTasks.add(topologyMasterId);
}
} else {
Map<Integer, TaskHeartbeat> taskHbs = topologyHbInfo.get_taskHbs();
int launchTime = JStormUtils.parseInt(data.getConf().get(Config.NIMBUS_TASK_LAUNCH_SECS));
if (taskHbs == null || taskHbs.get(topologyMasterId) == null || taskHbs.get(topologyMasterId).get_uptime() < launchTime) {
/*try {
clusterState.topology_heartbeat(topologyid, topologyHbInfo);
} catch (Exception e) {
LOG.error("Failed to update task heartbeat info to ZK for " + topologyid, e);
}*/
return;
}
}
Map<Integer, ResourceWorkerSlot> deadTaskWorkers = new HashMap<>();
for (Integer task : deadTasks) {
LOG.info("Found " + topologyid + ",taskid:" + task + " is dead");
ResourceWorkerSlot resource = null;
if (assignment != null)
resource = assignment.getWorkerByTaskId(task);
if (resource != null) {
deadTaskWorkers.put(task, resource);
Date now = new Date();
String nowStr = TimeFormat.getSecond(now);
String errorInfo = "Task-" + task + " is dead on " + resource.getHostname() + ":" + resource.getPort() + ", " + nowStr;
LOG.info(errorInfo);
clusterState.report_task_error(topologyid, task, errorInfo, ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD, ErrorConstants.DURATION_SECS_TASK_DEAD);
}
}
if (deadTaskWorkers.size() > 0) {
// notify jstorm monitor
TaskDeadEvent.pushEvent(topologyid, deadTaskWorkers);
}
}
NimbusUtils.transition(data, topologyid, false, StatusType.monitor);
}
if (topologyHbInfo != null) {
try {
clusterState.topology_heartbeat(topologyid, topologyHbInfo);
} catch (Exception e) {
LOG.error("Failed to update task heartbeat info to ZK for " + topologyid, e);
}
}
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
}
use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class JStormUtils method reportError.
public static void reportError(TopologyContext topologyContext, String errorMessge) {
StormClusterState zkCluster = topologyContext.getZkCluster();
String topologyId = topologyContext.getTopologyId();
int taskId = topologyContext.getThisTaskId();
try {
zkCluster.report_task_error(topologyId, taskId, errorMessge);
} catch (Exception e) {
LOG.warn("Failed to report Error");
}
}
Aggregations