use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.
the class StormZkClusterState method task_errors.
@Override
public List<TaskError> task_errors(String topologyId, int taskId) throws Exception {
List<TaskError> errors = new ArrayList<>();
String path = Cluster.taskerror_path(topologyId, taskId);
if (!cluster_state.node_existed(path, false)) {
return errors;
}
List<String> children = cluster_state.get_children(path, false);
for (String str : children) {
Object obj = getObject(path + Cluster.ZK_SEPERATOR + str, false);
if (obj != null) {
TaskError error = (TaskError) obj;
errors.add(error);
}
}
Collections.sort(errors, new Comparator<TaskError>() {
@Override
public int compare(TaskError o1, TaskError o2) {
if (o1.getTimSecs() > o2.getTimSecs()) {
return 1;
}
if (o1.getTimSecs() < o2.getTimSecs()) {
return -1;
}
return 0;
}
});
return errors;
}
use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.
the class ServiceHandler method getTopologyInfo.
/**
* Get TopologyInfo, it contain all topology running data
*
* @return TopologyInfo
*/
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws TException {
long start = System.nanoTime();
StormClusterState stormClusterState = data.getStormClusterState();
try {
// get topology's StormBase
StormBase base = stormClusterState.storm_base(topologyId, null);
if (base == null) {
throw new NotAliveException("No topology of " + topologyId);
}
Assignment assignment = stormClusterState.assignment_info(topologyId, null);
if (assignment == null) {
throw new NotAliveException("No topology of " + topologyId);
}
TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
Map<Integer, TaskHeartbeat> taskHbMap = null;
if (topologyTaskHbInfo != null)
taskHbMap = topologyTaskHbInfo.get_taskHbs();
Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
String errorString;
if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
errorString = "Y";
} else {
errorString = "";
}
TopologySummary topologySummary = new TopologySummary();
topologySummary.set_id(topologyId);
topologySummary.set_name(base.getStormName());
topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
topologySummary.set_status(base.getStatusString());
topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
topologySummary.set_numWorkers(assignment.getWorkers().size());
topologySummary.set_errorInfo(errorString);
Map<String, ComponentSummary> componentSummaryMap = new HashMap<>();
HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
String name = entry.getKey();
List<Integer> taskIds = entry.getValue();
if (taskIds == null || taskIds.size() == 0) {
LOG.warn("No task of component " + name);
continue;
}
ComponentSummary componentSummary = new ComponentSummary();
componentSummaryMap.put(name, componentSummary);
componentSummary.set_name(name);
componentSummary.set_type(taskToType.get(taskIds.get(0)));
componentSummary.set_parallel(taskIds.size());
componentSummary.set_taskIds(taskIds);
}
Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<>();
Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
for (Integer taskId : taskInfoMap.keySet()) {
TaskSummary taskSummary = new TaskSummary();
taskSummaryMap.put(taskId, taskSummary);
taskSummary.set_taskId(taskId);
if (taskHbMap == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
TaskHeartbeat hb = taskHbMap.get(taskId);
if (hb == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
if (isInactive)
taskSummary.set_status("INACTIVE");
else
taskSummary.set_status("ACTIVE");
taskSummary.set_uptime(hb.get_uptime());
}
}
if (StringUtils.isBlank(errorString)) {
continue;
}
List<TaskError> taskErrorList = taskErrors.get(taskId);
if (taskErrorList != null && taskErrorList.size() != 0) {
for (TaskError taskError : taskErrorList) {
ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
taskSummary.add_to_errors(errorInfo);
String component = taskToComponent.get(taskId);
componentSummaryMap.get(component).add_to_errors(errorInfo);
}
}
}
for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
String hostname = workerSlot.getHostname();
int port = workerSlot.getPort();
for (Integer taskId : workerSlot.getTasks()) {
TaskSummary taskSummary = taskSummaryMap.get(taskId);
taskSummary.set_host(hostname);
taskSummary.set_port(port);
}
}
TopologyInfo topologyInfo = new TopologyInfo();
topologyInfo.set_topology(topologySummary);
topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
// return topology metric & component metric only
List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
List<MetricInfo> compStreamMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT_STREAM);
MetricInfo taskMetric = MetricUtils.mkMetricInfo();
MetricInfo streamMetric = MetricUtils.mkMetricInfo();
MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
MetricInfo tpMetric, compMetric, compStreamMetric, workerMetric;
if (tpMetricList == null || tpMetricList.size() == 0) {
tpMetric = MetricUtils.mkMetricInfo();
} else {
// get the last min topology metric
tpMetric = tpMetricList.get(tpMetricList.size() - 1);
}
if (compMetricList == null || compMetricList.size() == 0) {
compMetric = MetricUtils.mkMetricInfo();
} else {
compMetric = compMetricList.get(0);
}
if (compStreamMetricList == null || compStreamMetricList.size() == 0) {
compStreamMetric = MetricUtils.mkMetricInfo();
} else {
compStreamMetric = compStreamMetricList.get(0);
}
if (workerMetricList == null || workerMetricList.size() == 0) {
workerMetric = MetricUtils.mkMetricInfo();
} else {
workerMetric = workerMetricList.get(0);
}
TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
topologyMetrics.set_compStreamMetric(compStreamMetric);
topologyInfo.set_metrics(topologyMetrics);
return topologyInfo;
} catch (TException e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw e;
} catch (Exception e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw new TException("Failed to get topologyInfo" + topologyId);
} finally {
long end = System.nanoTime();
SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
}
}
use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.
the class JStormHelper method checkError.
public static void checkError(Map conf, String topologyName) throws Exception {
NimbusClientWrapper client = new NimbusClientWrapper();
try {
Map clusterConf = Utils.readStormConfig();
clusterConf.putAll(conf);
client.init(clusterConf);
String topologyId = client.getClient().getTopologyId(topologyName);
Map<Integer, List<TaskError>> errors = getTaskErrors(topologyId, conf);
for (Entry<Integer, List<TaskError>> entry : errors.entrySet()) {
Integer taskId = entry.getKey();
List<TaskError> errorList = entry.getValue();
for (TaskError error : errorList) {
if (ErrorConstants.ERROR.equals(error.getLevel())) {
Assert.fail(taskId + " occur error:" + error.getError());
} else if (ErrorConstants.FATAL.equals(error.getLevel())) {
Assert.fail(taskId + " occur error:" + error.getError());
}
}
}
} finally {
client.cleanup();
}
}
use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.
the class StormZkClusterStateTest method testReport_task_error.
@Test
public void testReport_task_error() throws Exception {
String topology_id = "topology_id_1";
int task_id = 101;
TaskError expected = new TaskError("task is dead", ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD, TimeUtils.current_time_secs());
stormClusterState.report_task_error(topology_id, task_id, "task is dead", ErrorConstants.ERROR, ErrorConstants.CODE_TASK_DEAD);
String path = Cluster.taskerror_path(topology_id, task_id);
Map report_time = stormClusterState.topo_lastErr_time(topology_id);
List<String> err_time = stormClusterState.task_error_time(topology_id, task_id);
for (String time : err_time) {
String errPath = path + Cluster.ZK_SEPERATOR + time;
Object obj = stormClusterState.getObject(errPath, false);
assertEquals(expected, obj);
}
stormClusterState.remove_task_error(topology_id, task_id);
err_time = stormClusterState.task_error_time(topology_id, task_id);
assertEquals(0, err_time.size());
}
use of com.alibaba.jstorm.task.error.TaskError in project jstorm by alibaba.
the class StormZkClusterState method report_task_error.
@Override
public void report_task_error(String topology_id, int task_id, String error, String error_level, int error_code, int duration_secs, String tag) throws Exception {
boolean found = false;
String path = Cluster.taskerror_path(topology_id, task_id);
try {
cluster_state.mkdirs(path);
} catch (NodeExistsException ignored) {
}
List<Integer> children = new ArrayList<>();
int timeSecs = TimeUtils.current_time_secs();
String timestampPath = path + Cluster.ZK_SEPERATOR + timeSecs;
TaskError taskError = new TaskError(error, error_level, error_code, timeSecs, duration_secs);
for (String str : cluster_state.get_children(path, false)) {
String errorPath = path + Cluster.ZK_SEPERATOR + str;
Object obj = getObject(errorPath, false);
if (obj == null) {
deleteObject(errorPath);
continue;
}
TaskError errorInfo = (TaskError) obj;
// replace the old one if needed
if (errorInfo.getError().equals(error) || (tag != null && errorInfo.getError().startsWith(tag))) {
cluster_state.delete_node(errorPath);
setObject(timestampPath, taskError);
removeLastErrInfoDuration(topology_id, taskError.getDurationSecs());
found = true;
break;
}
children.add(Integer.parseInt(str));
}
if (!found) {
Collections.sort(children);
while (children.size() >= 3) {
deleteObject(path + Cluster.ZK_SEPERATOR + children.remove(0));
}
setObject(timestampPath, taskError);
}
setLastErrInfo(topology_id, duration_secs, timeSecs);
}
Aggregations