Search in sources :

Example 31 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class MonitorRunnable method run.

/**
     * @@@ Todo when one topology is being reassigned, the topology should skip check
     */
@Override
public void run() {
    StormClusterState clusterState = data.getStormClusterState();
    try {
        // Attetion, need first check Assignments
        List<String> active_topologys = clusterState.assignments(null);
        if (active_topologys == null) {
            LOG.info("Failed to get active topologies");
            return;
        }
        for (String topologyid : active_topologys) {
            if (clusterState.storm_base(topologyid, null) == null) {
                continue;
            }
            LOG.debug("Check tasks " + topologyid);
            // Attention, here don't check /ZK-dir/taskbeats/topologyid to
            // get task ids
            Set<Integer> taskIds = clusterState.task_ids(topologyid);
            if (taskIds == null) {
                LOG.info("Failed to get task ids of " + topologyid);
                continue;
            }
            Assignment assignment = clusterState.assignment_info(topologyid, null);
            Set<Integer> deadTasks = new HashSet<Integer>();
            boolean needReassign = false;
            for (Integer task : taskIds) {
                boolean isTaskDead = NimbusUtils.isTaskDead(data, topologyid, task);
                if (isTaskDead) {
                    deadTasks.add(task);
                    needReassign = true;
                }
            }
            TopologyTaskHbInfo topologyHbInfo = data.getTasksHeartbeat().get(topologyid);
            if (needReassign) {
                if (topologyHbInfo != null) {
                    int topologyMasterId = topologyHbInfo.get_topologyMasterId();
                    if (deadTasks.contains(topologyMasterId)) {
                        deadTasks.clear();
                        if (assignment != null) {
                            ResourceWorkerSlot resource = assignment.getWorkerByTaskId(topologyMasterId);
                            if (resource != null)
                                deadTasks.addAll(resource.getTasks());
                            else
                                deadTasks.add(topologyMasterId);
                        }
                    } else {
                        Map<Integer, TaskHeartbeat> taskHbs = topologyHbInfo.get_taskHbs();
                        int launchTime = JStormUtils.parseInt(data.getConf().get(Config.NIMBUS_TASK_LAUNCH_SECS));
                        if (taskHbs == null || taskHbs.get(topologyMasterId) == null || taskHbs.get(topologyMasterId).get_uptime() < launchTime) {
                            /*try {
                                    clusterState.topology_heartbeat(topologyid, topologyHbInfo);
                                } catch (Exception e) {
                                    LOG.error("Failed to update task heartbeat info to ZK for " + topologyid, e);
                                }*/
                            return;
                        }
                    }
                    Map<Integer, ResourceWorkerSlot> deadTaskWorkers = new HashMap<>();
                    for (Integer task : deadTasks) {
                        LOG.info("Found " + topologyid + ",taskid:" + task + " is dead");
                        ResourceWorkerSlot resource = null;
                        if (assignment != null)
                            resource = assignment.getWorkerByTaskId(task);
                        if (resource != null) {
                            deadTaskWorkers.put(task, resource);
                            Date now = new Date();
                            String nowStr = TimeFormat.getSecond(now);
                            String errorInfo = "Task-" + task + " is dead on " + resource.getHostname() + ":" + resource.getPort() + ", " + nowStr;
                            LOG.info(errorInfo);
                            clusterState.report_task_error(topologyid, task, errorInfo, ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD, ErrorConstants.DURATION_SECS_TASK_DEAD);
                        }
                    }
                    if (deadTaskWorkers.size() > 0) {
                        // notify jstorm monitor
                        TaskDeadEvent.pushEvent(topologyid, deadTaskWorkers);
                    }
                }
                NimbusUtils.transition(data, topologyid, false, StatusType.monitor);
            }
            if (topologyHbInfo != null) {
                try {
                    clusterState.topology_heartbeat(topologyid, topologyHbInfo);
                } catch (Exception e) {
                    LOG.error("Failed to update task heartbeat info to ZK for " + topologyid, e);
                }
            }
        }
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
    }
}
Also used : HashMap(java.util.HashMap) TopologyTaskHbInfo(backtype.storm.generated.TopologyTaskHbInfo) Date(java.util.Date) TaskHeartbeat(backtype.storm.generated.TaskHeartbeat) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) HashSet(java.util.HashSet) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 32 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class JStormUtils method reportError.

public static void reportError(TopologyContext topologyContext, String errorMessge) {
    StormClusterState zkCluster = topologyContext.getZkCluster();
    String topologyId = topologyContext.getTopologyId();
    int taskId = topologyContext.getThisTaskId();
    try {
        zkCluster.report_task_error(topologyId, taskId, errorMessge);
    } catch (Exception e) {
        LOG.warn("Failed to report Error");
    }
}
Also used : StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) ExecuteException(org.apache.commons.exec.ExecuteException) TException(org.apache.thrift.TException) IOException(java.io.IOException)

Aggregations

StormClusterState (com.alibaba.jstorm.cluster.StormClusterState)32 IOException (java.io.IOException)12 NotAliveException (backtype.storm.generated.NotAliveException)10 FailedAssignTopologyException (com.alibaba.jstorm.utils.FailedAssignTopologyException)10 TException (org.apache.thrift.TException)10 InvalidParameterException (java.security.InvalidParameterException)9 HashMap (java.util.HashMap)9 AlreadyAliveException (backtype.storm.generated.AlreadyAliveException)8 InvalidTopologyException (backtype.storm.generated.InvalidTopologyException)8 KeyAlreadyExistsException (backtype.storm.generated.KeyAlreadyExistsException)8 KeyNotFoundException (backtype.storm.generated.KeyNotFoundException)8 TopologyAssignException (backtype.storm.generated.TopologyAssignException)8 FileNotFoundException (java.io.FileNotFoundException)8 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)8 LocalFsBlobStore (com.alibaba.jstorm.blobstore.LocalFsBlobStore)7 Assignment (com.alibaba.jstorm.schedule.Assignment)7 ResourceWorkerSlot (com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)7 Map (java.util.Map)7 TreeMap (java.util.TreeMap)7 BlobStore (com.alibaba.jstorm.blobstore.BlobStore)5