use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class ServiceHandler method getTopologyInfo.
/**
* Get TopologyInfo, it contain all data of the topology running status
*
* @return TopologyInfo
*/
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {
long start = System.nanoTime();
StormClusterState stormClusterState = data.getStormClusterState();
try {
// get topology's StormBase
StormBase base = stormClusterState.storm_base(topologyId, null);
if (base == null) {
throw new NotAliveException("No topology of " + topologyId);
}
Assignment assignment = stormClusterState.assignment_info(topologyId, null);
if (assignment == null) {
throw new NotAliveException("No topology of " + topologyId);
}
TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
Map<Integer, TaskHeartbeat> taskHbMap = null;
if (topologyTaskHbInfo != null)
taskHbMap = topologyTaskHbInfo.get_taskHbs();
Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
String errorString;
if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
errorString = "Y";
} else {
errorString = "";
}
TopologySummary topologySummary = new TopologySummary();
topologySummary.set_id(topologyId);
topologySummary.set_name(base.getStormName());
topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
topologySummary.set_status(base.getStatusString());
topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
topologySummary.set_numWorkers(assignment.getWorkers().size());
topologySummary.set_errorInfo(errorString);
Map<String, ComponentSummary> componentSummaryMap = new HashMap<String, ComponentSummary>();
HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
String name = entry.getKey();
List<Integer> taskIds = entry.getValue();
if (taskIds == null || taskIds.size() == 0) {
LOG.warn("No task of component " + name);
continue;
}
ComponentSummary componentSummary = new ComponentSummary();
componentSummaryMap.put(name, componentSummary);
componentSummary.set_name(name);
componentSummary.set_type(taskToType.get(taskIds.get(0)));
componentSummary.set_parallel(taskIds.size());
componentSummary.set_taskIds(taskIds);
}
Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<Integer, TaskSummary>();
Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
for (Integer taskId : taskInfoMap.keySet()) {
TaskSummary taskSummary = new TaskSummary();
taskSummaryMap.put(taskId, taskSummary);
taskSummary.set_taskId(taskId);
if (taskHbMap == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
TaskHeartbeat hb = taskHbMap.get(taskId);
if (hb == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
if (isInactive)
taskSummary.set_status("INACTIVE");
else
taskSummary.set_status("ACTIVE");
taskSummary.set_uptime(hb.get_uptime());
}
}
if (StringUtils.isBlank(errorString)) {
continue;
}
List<TaskError> taskErrorList = taskErrors.get(taskId);
if (taskErrorList != null && taskErrorList.size() != 0) {
for (TaskError taskError : taskErrorList) {
ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
taskSummary.add_to_errors(errorInfo);
String component = taskToComponent.get(taskId);
componentSummaryMap.get(component).add_to_errors(errorInfo);
}
}
}
for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
String hostname = workerSlot.getHostname();
int port = workerSlot.getPort();
for (Integer taskId : workerSlot.getTasks()) {
TaskSummary taskSummary = taskSummaryMap.get(taskId);
taskSummary.set_host(hostname);
taskSummary.set_port(port);
}
}
TopologyInfo topologyInfo = new TopologyInfo();
topologyInfo.set_topology(topologySummary);
topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
// return topology metric & component metric only
List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
MetricInfo taskMetric = MetricUtils.mkMetricInfo();
MetricInfo streamMetric = MetricUtils.mkMetricInfo();
MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
MetricInfo tpMetric, compMetric, workerMetric;
if (tpMetricList == null || tpMetricList.size() == 0) {
tpMetric = MetricUtils.mkMetricInfo();
} else {
// get the last min topology metric
tpMetric = tpMetricList.get(tpMetricList.size() - 1);
}
if (compMetricList == null || compMetricList.size() == 0) {
compMetric = MetricUtils.mkMetricInfo();
} else {
compMetric = compMetricList.get(0);
}
if (workerMetricList == null || workerMetricList.size() == 0) {
workerMetric = MetricUtils.mkMetricInfo();
} else {
workerMetric = workerMetricList.get(0);
}
TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
topologyInfo.set_metrics(topologyMetrics);
return topologyInfo;
} catch (TException e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw e;
} catch (Exception e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw new TException("Failed to get topologyInfo" + topologyId);
} finally {
long end = System.nanoTime();
SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
}
}
use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class ServiceHandler method getSupervisorWorkersByHostOrId.
/**
* gets supervisor workers by host or supervisor id, note that id priors to host.
*
* @param host host
* @param id supervisor id
* @return supervisor workers
*/
private SupervisorWorkers getSupervisorWorkersByHostOrId(String host, String id) throws TException {
long start = System.nanoTime();
if (StringUtils.isBlank(id) && StringUtils.isBlank(host)) {
throw new TException("Must specify host or supervisor id!");
}
try {
StormClusterState stormClusterState = data.getStormClusterState();
// all supervisors
Map<String, SupervisorInfo> supervisorInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
SupervisorInfo supervisorInfo = null;
String ip;
if (!StringUtils.isBlank(id)) {
supervisorInfo = supervisorInfos.get(id);
host = supervisorInfo.getHostName();
ip = NetWorkUtils.host2Ip(host);
} else {
ip = NetWorkUtils.host2Ip(host);
for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {
SupervisorInfo info = entry.getValue();
if (info.getHostName().equals(host) || info.getHostName().equals(ip)) {
id = entry.getKey();
supervisorInfo = info;
break;
}
}
}
if (supervisorInfo == null) {
throw new TException("unknown supervisor id:" + id);
}
Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);
Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
int usedSlotNumber = 0;
Map<String, Map<Integer, String>> topologyTaskToComponent = new HashMap<String, Map<Integer, String>>();
Map<String, MetricInfo> metricInfoMap = new HashMap<String, MetricInfo>();
for (Entry<String, Assignment> entry : assignments.entrySet()) {
String topologyId = entry.getKey();
Assignment assignment = entry.getValue();
Set<ResourceWorkerSlot> workers = assignment.getWorkers();
for (ResourceWorkerSlot worker : workers) {
if (!id.equals(worker.getNodeId())) {
continue;
}
usedSlotNumber++;
Integer port = worker.getPort();
WorkerSummary workerSummary = portWorkerSummarys.get(port);
if (workerSummary == null) {
workerSummary = new WorkerSummary();
workerSummary.set_port(port);
workerSummary.set_topology(topologyId);
workerSummary.set_tasks(new ArrayList<TaskComponent>());
portWorkerSummarys.put(port, workerSummary);
}
Map<Integer, String> taskToComponent = topologyTaskToComponent.get(topologyId);
if (taskToComponent == null) {
taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
topologyTaskToComponent.put(topologyId, taskToComponent);
}
int earliest = TimeUtils.current_time_secs();
for (Integer taskId : worker.getTasks()) {
TaskComponent taskComponent = new TaskComponent();
taskComponent.set_component(taskToComponent.get(taskId));
taskComponent.set_taskId(taskId);
Integer startTime = assignment.getTaskStartTimeSecs().get(taskId);
if (startTime != null && startTime < earliest) {
earliest = startTime;
}
workerSummary.add_to_tasks(taskComponent);
}
workerSummary.set_uptime(TimeUtils.time_delta(earliest));
String workerSlotName = getWorkerSlotName(ip, port);
List<MetricInfo> workerMetricInfoList = this.data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
if (workerMetricInfoList.size() > 0) {
MetricInfo workerMetricInfo = workerMetricInfoList.get(0);
// remove metrics that don't belong to current worker
for (Iterator<String> itr = workerMetricInfo.get_metrics().keySet().iterator(); itr.hasNext(); ) {
String metricName = itr.next();
if (!metricName.contains(ip)) {
itr.remove();
}
}
metricInfoMap.put(workerSlotName, workerMetricInfo);
}
}
}
List<WorkerSummary> workerList = new ArrayList<WorkerSummary>();
workerList.addAll(portWorkerSummarys.values());
Map<String, Integer> supervisorToUsedSlotNum = new HashMap<String, Integer>();
supervisorToUsedSlotNum.put(id, usedSlotNumber);
SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, id, supervisorToUsedSlotNum);
return new SupervisorWorkers(supervisorSummary, workerList, metricInfoMap);
} catch (TException e) {
LOG.info("Failed to get ClusterSummary ", e);
throw e;
} catch (Exception e) {
LOG.info("Failed to get ClusterSummary ", e);
throw new TException(e);
} finally {
long end = System.nanoTime();
SimpleJStormMetric.updateNimbusHistogram("getSupervisorWorkers", (end - start) / TimeUtils.NS_PER_US);
}
}
use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class TopologyAssign method backupAssignment.
/**
* Backup the toplogy's Assignment to ZK
*
* @param assignment
* @param event
* @@@ Question Do we need to do backup operation every time?
*/
public void backupAssignment(Assignment assignment, TopologyAssignEvent event) {
String topologyId = event.getTopologyId();
String topologyName = event.getTopologyName();
try {
StormClusterState zkClusterState = nimbusData.getStormClusterState();
// one little problem, get tasks twice when assign one topology
Map<Integer, String> tasks = Cluster.get_all_task_component(zkClusterState, topologyId, null);
Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks);
for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
List<Integer> keys = entry.getValue();
Collections.sort(keys);
}
AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment);
zkClusterState.backup_assignment(topologyName, assignmentBak);
} catch (Exception e) {
LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e);
}
}
use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class ServiceHandler method updateTopology.
@Override
public void updateTopology(String name, String uploadedLocation, String updateConf) throws NotAliveException, InvalidTopologyException, TException {
try {
//firstly update jar and conf
checkTopologyActive(data, name, true);
String topologyId = null;
StormClusterState stormClusterState = data.getStormClusterState();
topologyId = Cluster.get_topology_id(stormClusterState, name);
if (topologyId == null) {
throw new NotAliveException(name);
}
BlobStore blobStore = data.getBlobStore();
StormClusterState clusterState = data.getStormClusterState();
NimbusInfo nimbusInfo = data.getNimbusHostPortInfo();
if (uploadedLocation != null) {
setupJar(uploadedLocation, topologyId, blobStore, clusterState, nimbusInfo, true);
}
Map topoConf = StormConfig.read_nimbus_topology_conf(topologyId, data.getBlobStore());
Map<Object, Object> config = (Map<Object, Object>) JStormUtils.from_json(updateConf);
topoConf.putAll(config);
String confKey = StormConfig.master_stormconf_key(topologyId);
BlobStoreUtils.updateBlob(blobStore, confKey, Utils.serialize(topoConf));
if (blobStore instanceof LocalFsBlobStore) {
clusterState.setup_blobstore(confKey, nimbusInfo, BlobStoreUtils.getVersionForKey(confKey, nimbusInfo, conf));
}
NimbusUtils.transitionName(data, name, true, StatusType.update_topology, config);
LOG.info("update topology " + name + " successfully");
notifyTopologyActionListener(name, "updateTopology");
} catch (NotAliveException e) {
String errMsg = "Error, no this topology " + name;
LOG.error(errMsg, e);
throw new NotAliveException(errMsg);
} catch (Exception e) {
String errMsg = "Failed to update topology " + name;
LOG.error(errMsg, e);
throw new TException(errMsg);
}
}
use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.
the class FollowerRunnable method run.
@Override
public void run() {
// TODO Auto-generated method stub
LOG.info("Follower Thread starts!");
while (state) {
StormClusterState zkClusterState = data.getStormClusterState();
try {
Thread.sleep(sleepTime);
if (!zkClusterState.leader_existed()) {
this.tryToBeLeader(data.getConf());
continue;
}
String master = zkClusterState.get_leader_host();
boolean isZkLeader = isLeader(master);
if (isZkLeader) {
if (!data.isLeader()) {
zkClusterState.unregister_nimbus_host(hostPort);
zkClusterState.unregister_nimbus_detail(hostPort);
data.setLeader(true);
leaderCallback.execute();
}
continue;
} else {
if (data.isLeader()) {
LOG.info("New ZK master is " + master);
JStormUtils.halt_process(1, "Lose ZK master node, halt process");
return;
}
}
// here the nimbus is not leader
if (data.getBlobStore() instanceof LocalFsBlobStore) {
blobSync();
}
zkClusterState.update_nimbus_slave(hostPort, data.uptime());
update_nimbus_detail();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
continue;
} catch (Exception e) {
if (state) {
LOG.error("Unknow exception ", e);
}
}
}
LOG.info("Follower Thread has closed!");
}
Aggregations