Search in sources :

Example 1 with TaskError

use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.

the class StormZkClusterState method task_errors.

@Override
public List<TaskError> task_errors(String topologyId, int taskId) throws Exception {
    List<TaskError> errors = new ArrayList<>();
    String path = Cluster.taskerror_path(topologyId, taskId);
    if (!cluster_state.node_existed(path, false)) {
        return errors;
    }
    List<String> children = cluster_state.get_children(path, false);
    for (String str : children) {
        Object obj = getObject(path + Cluster.ZK_SEPERATOR + str, false);
        if (obj != null) {
            TaskError error = (TaskError) obj;
            errors.add(error);
        }
    }
    Collections.sort(errors, new Comparator<TaskError>() {

        @Override
        public int compare(TaskError o1, TaskError o2) {
            if (o1.getTimSecs() > o2.getTimSecs()) {
                return 1;
            }
            if (o1.getTimSecs() < o2.getTimSecs()) {
                return -1;
            }
            return 0;
        }
    });
    return errors;
}
Also used : TaskError(com.alibaba.jstorm.task.error.TaskError) ArrayList(java.util.ArrayList)

Example 2 with TaskError

use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.

the class ServiceHandler method getTopologyInfo.

/**
 * Get TopologyInfo, it contain all topology running data
 *
 * @return TopologyInfo
 */
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws TException {
    long start = System.nanoTime();
    StormClusterState stormClusterState = data.getStormClusterState();
    try {
        // get topology's StormBase
        StormBase base = stormClusterState.storm_base(topologyId, null);
        if (base == null) {
            throw new NotAliveException("No topology of " + topologyId);
        }
        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
            throw new NotAliveException("No topology of " + topologyId);
        }
        TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
        Map<Integer, TaskHeartbeat> taskHbMap = null;
        if (topologyTaskHbInfo != null)
            taskHbMap = topologyTaskHbInfo.get_taskHbs();
        Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
        Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
        Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
        String errorString;
        if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
            errorString = "Y";
        } else {
            errorString = "";
        }
        TopologySummary topologySummary = new TopologySummary();
        topologySummary.set_id(topologyId);
        topologySummary.set_name(base.getStormName());
        topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
        topologySummary.set_status(base.getStatusString());
        topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
        topologySummary.set_numWorkers(assignment.getWorkers().size());
        topologySummary.set_errorInfo(errorString);
        Map<String, ComponentSummary> componentSummaryMap = new HashMap<>();
        HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
        for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
            String name = entry.getKey();
            List<Integer> taskIds = entry.getValue();
            if (taskIds == null || taskIds.size() == 0) {
                LOG.warn("No task of component " + name);
                continue;
            }
            ComponentSummary componentSummary = new ComponentSummary();
            componentSummaryMap.put(name, componentSummary);
            componentSummary.set_name(name);
            componentSummary.set_type(taskToType.get(taskIds.get(0)));
            componentSummary.set_parallel(taskIds.size());
            componentSummary.set_taskIds(taskIds);
        }
        Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<>();
        Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
        for (Integer taskId : taskInfoMap.keySet()) {
            TaskSummary taskSummary = new TaskSummary();
            taskSummaryMap.put(taskId, taskSummary);
            taskSummary.set_taskId(taskId);
            if (taskHbMap == null) {
                taskSummary.set_status("Starting");
                taskSummary.set_uptime(0);
            } else {
                TaskHeartbeat hb = taskHbMap.get(taskId);
                if (hb == null) {
                    taskSummary.set_status("Starting");
                    taskSummary.set_uptime(0);
                } else {
                    boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
                    if (isInactive)
                        taskSummary.set_status("INACTIVE");
                    else
                        taskSummary.set_status("ACTIVE");
                    taskSummary.set_uptime(hb.get_uptime());
                }
            }
            if (StringUtils.isBlank(errorString)) {
                continue;
            }
            List<TaskError> taskErrorList = taskErrors.get(taskId);
            if (taskErrorList != null && taskErrorList.size() != 0) {
                for (TaskError taskError : taskErrorList) {
                    ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
                    taskSummary.add_to_errors(errorInfo);
                    String component = taskToComponent.get(taskId);
                    componentSummaryMap.get(component).add_to_errors(errorInfo);
                }
            }
        }
        for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
            String hostname = workerSlot.getHostname();
            int port = workerSlot.getPort();
            for (Integer taskId : workerSlot.getTasks()) {
                TaskSummary taskSummary = taskSummaryMap.get(taskId);
                taskSummary.set_host(hostname);
                taskSummary.set_port(port);
            }
        }
        TopologyInfo topologyInfo = new TopologyInfo();
        topologyInfo.set_topology(topologySummary);
        topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
        topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
        // return topology metric & component metric only
        List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
        List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
        List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
        List<MetricInfo> compStreamMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT_STREAM);
        MetricInfo taskMetric = MetricUtils.mkMetricInfo();
        MetricInfo streamMetric = MetricUtils.mkMetricInfo();
        MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
        MetricInfo tpMetric, compMetric, compStreamMetric, workerMetric;
        if (tpMetricList == null || tpMetricList.size() == 0) {
            tpMetric = MetricUtils.mkMetricInfo();
        } else {
            // get the last min topology metric
            tpMetric = tpMetricList.get(tpMetricList.size() - 1);
        }
        if (compMetricList == null || compMetricList.size() == 0) {
            compMetric = MetricUtils.mkMetricInfo();
        } else {
            compMetric = compMetricList.get(0);
        }
        if (compStreamMetricList == null || compStreamMetricList.size() == 0) {
            compStreamMetric = MetricUtils.mkMetricInfo();
        } else {
            compStreamMetric = compStreamMetricList.get(0);
        }
        if (workerMetricList == null || workerMetricList.size() == 0) {
            workerMetric = MetricUtils.mkMetricInfo();
        } else {
            workerMetric = workerMetricList.get(0);
        }
        TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
        topologyMetrics.set_compStreamMetric(compStreamMetric);
        topologyInfo.set_metrics(topologyMetrics);
        return topologyInfo;
    } catch (TException e) {
        LOG.info("Failed to get topologyInfo " + topologyId, e);
        throw e;
    } catch (Exception e) {
        LOG.info("Failed to get topologyInfo " + topologyId, e);
        throw new TException("Failed to get topologyInfo" + topologyId);
    } finally {
        long end = System.nanoTime();
        SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
    }
}
Also used : TException(org.apache.thrift.TException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) StormBase(com.alibaba.jstorm.cluster.StormBase) ComponentSummary(backtype.storm.generated.ComponentSummary) Assignment(com.alibaba.jstorm.schedule.Assignment) TaskInfo(com.alibaba.jstorm.task.TaskInfo) NotAliveException(backtype.storm.generated.NotAliveException) ArrayList(java.util.ArrayList) List(java.util.List) TopologySummary(backtype.storm.generated.TopologySummary) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot) TopologyTaskHbInfo(backtype.storm.generated.TopologyTaskHbInfo) ErrorInfo(backtype.storm.generated.ErrorInfo) TaskError(com.alibaba.jstorm.task.error.TaskError) TopologyMetric(backtype.storm.generated.TopologyMetric) TreeMap(java.util.TreeMap) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) KeyNotFoundException(backtype.storm.generated.KeyNotFoundException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) FileNotFoundException(java.io.FileNotFoundException) NotAliveException(backtype.storm.generated.NotAliveException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) KeyAlreadyExistsException(backtype.storm.generated.KeyAlreadyExistsException) TaskHeartbeat(backtype.storm.generated.TaskHeartbeat) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) MetricInfo(backtype.storm.generated.MetricInfo) TaskSummary(backtype.storm.generated.TaskSummary) TopologyInfo(backtype.storm.generated.TopologyInfo)

Example 3 with TaskError

use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.

the class JStormHelper method checkError.

public static void checkError(Map conf, String topologyName) throws Exception {
    NimbusClientWrapper client = new NimbusClientWrapper();
    try {
        Map clusterConf = Utils.readStormConfig();
        clusterConf.putAll(conf);
        client.init(clusterConf);
        String topologyId = client.getClient().getTopologyId(topologyName);
        Map<Integer, List<TaskError>> errors = getTaskErrors(topologyId, conf);
        for (Entry<Integer, List<TaskError>> entry : errors.entrySet()) {
            Integer taskId = entry.getKey();
            List<TaskError> errorList = entry.getValue();
            for (TaskError error : errorList) {
                if (ErrorConstants.ERROR.equals(error.getLevel())) {
                    Assert.fail(taskId + " occur error:" + error.getError());
                } else if (ErrorConstants.FATAL.equals(error.getLevel())) {
                    Assert.fail(taskId + " occur error:" + error.getError());
                }
            }
        }
    } finally {
        client.cleanup();
    }
}
Also used : NimbusClientWrapper(backtype.storm.utils.NimbusClientWrapper) TaskError(com.alibaba.jstorm.task.error.TaskError) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with TaskError

use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.

the class StormZkClusterStateTest method testReport_task_error.

@Test
public void testReport_task_error() throws Exception {
    String topology_id = "topology_id_1";
    int task_id = 101;
    TaskError expected = new TaskError("task is dead", ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD, TimeUtils.current_time_secs());
    stormClusterState.report_task_error(topology_id, task_id, "task is dead", ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD);
    String path = Cluster.taskerror_path(topology_id, task_id);
    Map report_time = stormClusterState.topo_lastErr_time(topology_id);
    List<String> err_time = stormClusterState.task_error_time(topology_id, task_id);
    for (String time : err_time) {
        String errPath = path + Cluster.ZK_SEPERATOR + time;
        Object obj = stormClusterState.getObject(errPath, false);
        assertEquals(expected, obj);
    }
    stormClusterState.remove_task_error(topology_id, task_id);
    err_time = stormClusterState.task_error_time(topology_id, task_id);
    assertEquals(0, err_time.size());
}
Also used : TaskError(com.alibaba.jstorm.task.error.TaskError) Map(java.util.Map) LocalClusterMap(backtype.storm.LocalClusterMap) Test(org.junit.Test)

Example 5 with TaskError

use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.

the class StormZkClusterState method report_task_error.

@Override
public void report_task_error(String topology_id, int task_id, String error, String error_level, int error_code, int duration_secs, String tag) throws Exception {
    boolean found = false;
    String path = Cluster.taskerror_path(topology_id, task_id);
    try {
        cluster_state.mkdirs(path);
    } catch (NodeExistsException ignored) {
    }
    List<Integer> children = new ArrayList<>();
    int timeSecs = TimeUtils.current_time_secs();
    String timestampPath = path + Cluster.ZK_SEPERATOR + timeSecs;
    TaskError taskError = new TaskError(error, error_level, error_code, timeSecs, duration_secs);
    for (String str : cluster_state.get_children(path, false)) {
        String errorPath = path + Cluster.ZK_SEPERATOR + str;
        Object obj = getObject(errorPath, false);
        if (obj == null) {
            deleteObject(errorPath);
            continue;
        }
        TaskError errorInfo = (TaskError) obj;
        // replace the old one if needed
        if (errorInfo.getError().equals(error) || (tag != null && errorInfo.getError().startsWith(tag))) {
            cluster_state.delete_node(errorPath);
            setObject(timestampPath, taskError);
            removeLastErrInfoDuration(topology_id, taskError.getDurationSecs());
            found = true;
            break;
        }
        children.add(Integer.parseInt(str));
    }
    if (!found) {
        Collections.sort(children);
        while (children.size() >= 3) {
            deleteObject(path + Cluster.ZK_SEPERATOR + children.remove(0));
        }
        setObject(timestampPath, taskError);
    }
    setLastErrInfo(topology_id, duration_secs, timeSecs);
}
Also used : NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) ArrayList(java.util.ArrayList) TaskError(com.alibaba.jstorm.task.error.TaskError)

Aggregations

TaskError (com.alibaba.jstorm.task.error.TaskError)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 LocalClusterMap (backtype.storm.LocalClusterMap)1 AlreadyAliveException (backtype.storm.generated.AlreadyAliveException)1 ComponentSummary (backtype.storm.generated.ComponentSummary)1 ErrorInfo (backtype.storm.generated.ErrorInfo)1 InvalidTopologyException (backtype.storm.generated.InvalidTopologyException)1 KeyAlreadyExistsException (backtype.storm.generated.KeyAlreadyExistsException)1 KeyNotFoundException (backtype.storm.generated.KeyNotFoundException)1 MetricInfo (backtype.storm.generated.MetricInfo)1 NotAliveException (backtype.storm.generated.NotAliveException)1 TaskHeartbeat (backtype.storm.generated.TaskHeartbeat)1 TaskSummary (backtype.storm.generated.TaskSummary)1 TopologyAssignException (backtype.storm.generated.TopologyAssignException)1 TopologyInfo (backtype.storm.generated.TopologyInfo)1 TopologyMetric (backtype.storm.generated.TopologyMetric)1 TopologySummary (backtype.storm.generated.TopologySummary)1