use of backtype.storm.generated.NotAliveException in project jstorm by alibaba.
the class ClusterInfoBolt method getClusterInfo.
private void getClusterInfo(Client client) {
try {
ClusterSummary clusterSummary = client.getClusterInfo();
List<SupervisorSummary> supervisorSummaryList = clusterSummary.get_supervisors();
int totalWorkers = 0;
int usedWorkers = 0;
for (SupervisorSummary summary : supervisorSummaryList) {
totalWorkers += summary.get_num_workers();
usedWorkers += summary.get_num_used_workers();
}
int freeWorkers = totalWorkers - usedWorkers;
LOGGER.info("cluster totalWorkers = " + totalWorkers + ", usedWorkers = " + usedWorkers + ", freeWorkers = " + freeWorkers);
HttpCatClient.sendMetric("ClusterMonitor", "freeSlots", "avg", String.valueOf(freeWorkers));
HttpCatClient.sendMetric("ClusterMonitor", "totalSlots", "avg", String.valueOf(totalWorkers));
List<TopologySummary> topologySummaryList = clusterSummary.get_topologies();
long clusterTPS = 0l;
for (TopologySummary topology : topologySummaryList) {
long topologyTPS = getTopologyTPS(topology, client);
clusterTPS += topologyTPS;
if (topology.get_name().startsWith("ClusterMonitor")) {
continue;
}
HttpCatClient.sendMetric(topology.get_name(), topology.get_name() + "-TPS", "avg", String.valueOf(topologyTPS));
}
HttpCatClient.sendMetric("ClusterMonitor", "ClusterEmitTPS", "avg", String.valueOf(clusterTPS));
} catch (TException e) {
initClient(configMap);
LOGGER.error("get client info error.", e);
} catch (NotAliveException nae) {
LOGGER.warn("topology is dead.", nae);
}
}
use of backtype.storm.generated.NotAliveException in project jstorm by alibaba.
the class ServiceHandler method killTopologyWithOpts.
@Override
public void killTopologyWithOpts(String topologyName, KillOptions options) throws TException, NotAliveException {
try {
checkTopologyActive(data, topologyName, true);
String topologyId = getTopologyId(topologyName);
Integer wait_amt = null;
if (options.is_set_wait_secs()) {
wait_amt = options.get_wait_secs();
}
NimbusUtils.transitionName(data, topologyName, true, StatusType.kill, wait_amt);
KillTopologyEvent.pushEvent(topologyId);
notifyTopologyActionListener(topologyName, "killTopology");
} catch (NotAliveException e) {
String errMsg = "KillTopology Error, no this topology " + topologyName;
LOG.error(errMsg, e);
throw new NotAliveException(errMsg);
} catch (Exception e) {
String errMsg = "Failed to kill topology " + topologyName;
LOG.error(errMsg, e);
throw new TException(errMsg);
}
}
use of backtype.storm.generated.NotAliveException in project jstorm by alibaba.
the class ServiceHandler method deactivate.
/**
* set topology stauts as deactive
*
* @param topologyName
*/
@Override
public void deactivate(String topologyName) throws TException, NotAliveException {
try {
NimbusUtils.transitionName(data, topologyName, true, StatusType.inactivate);
notifyTopologyActionListener(topologyName, "inactivate");
} catch (NotAliveException e) {
String errMsg = "Deactivate Error, no this topology " + topologyName;
LOG.error(errMsg, e);
throw new NotAliveException(errMsg);
} catch (Exception e) {
String errMsg = "Failed to deactivate topology " + topologyName;
LOG.error(errMsg, e);
throw new TException(errMsg);
}
}
use of backtype.storm.generated.NotAliveException in project jstorm by alibaba.
the class ServiceHandler method submitTopologyWithOpts.
/**
* Submit one Topology
*
* @param topologyName String: topology name
* @param uploadedJarLocation String: already uploaded jar path
* @param jsonConf String: jsonConf serialize all toplogy configuration to
* Json
* @param topology StormTopology: topology Object
*/
@SuppressWarnings("unchecked")
@Override
public String submitTopologyWithOpts(String topologyName, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException {
LOG.info("Receive " + topologyName + ", uploadedJarLocation:" + uploadedJarLocation);
long start = System.nanoTime();
//check topologyname is valid
if (!Common.charValidate(topologyName)) {
throw new InvalidTopologyException(topologyName + " is not a valid topology name");
}
Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf);
if (serializedConf == null) {
LOG.warn("Failed to serialized Configuration");
throw new InvalidTopologyException("Failed to serialize topology configuration");
}
Common.confValidate(serializedConf, data.getConf());
boolean enableDeploy = ConfigExtension.getTopologyHotDeplogyEnable(serializedConf);
try {
checkTopologyActive(data, topologyName, enableDeploy);
} catch (AlreadyAliveException e) {
LOG.info(topologyName + " already exists ");
throw e;
} catch (NotAliveException e) {
LOG.info(topologyName + " is not alive ");
throw e;
} catch (Throwable e) {
LOG.info("Failed to check whether topology is alive or not", e);
throw new TException(e);
}
if (enableDeploy) {
LOG.info("deploy the topology");
try {
StormClusterState stormClusterState = data.getStormClusterState();
String topologyId = Cluster.get_topology_id(stormClusterState, topologyName);
if (topologyId == null) {
throw new NotAliveException(topologyName);
}
LOG.info("start kill the old topology {}", topologyId);
Map oldConf = new HashMap();
oldConf.putAll(conf);
Map killedStormConf = StormConfig.read_nimbus_topology_conf(topologyId, data.getBlobStore());
if (killedStormConf != null) {
oldConf.putAll(killedStormConf);
}
NimbusUtils.transitionName(data, topologyName, true, StatusType.kill, 0);
KillTopologyEvent.pushEvent(topologyId);
notifyTopologyActionListener(topologyName, "killTopology");
//wait all workers' are killed
final long timeoutSeconds = ConfigExtension.getTaskCleanupTimeoutSec(oldConf);
ConcurrentHashMap<String, Semaphore> topologyIdtoSem = data.getTopologyIdtoSem();
if (!topologyIdtoSem.contains(topologyId)) {
topologyIdtoSem.putIfAbsent(topologyId, new Semaphore(0));
}
Semaphore semaphore = topologyIdtoSem.get(topologyId);
if (semaphore != null) {
semaphore.tryAcquire(timeoutSeconds, TimeUnit.SECONDS);
topologyIdtoSem.remove(semaphore);
}
LOG.info("success kill the old topology {}", topologyId);
} catch (Exception e) {
String errMsg = "Failed to kill topology " + topologyName;
LOG.error(errMsg, e);
throw new TException(errMsg);
}
}
String topologyId = null;
synchronized (data) {
// avoid same topologies from being submitted at the same time
Set<String> pendingTopologies = data.getPendingSubmitTopologies().buildMap().keySet();
for (String cachTopologyId : pendingTopologies) {
if (cachTopologyId.contains(topologyName + "-"))
throw new AlreadyAliveException(topologyName + " were submitted");
}
int counter = data.getSubmittedCount().incrementAndGet();
topologyId = Common.topologyNameToId(topologyName, counter);
data.getPendingSubmitTopologies().put(topologyId, null);
}
try {
serializedConf.put(Config.TOPOLOGY_ID, topologyId);
serializedConf.put(Config.TOPOLOGY_NAME, topologyName);
Map<Object, Object> stormConf;
stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology);
LOG.info("Normalized configuration:" + stormConf);
Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf);
totalStormConf.putAll(stormConf);
StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology, true);
// this validates the structure of the topology
Common.validate_basic(normalizedTopology, totalStormConf, topologyId);
// don't need generate real topology, so skip Common.system_topology
// Common.system_topology(totalStormConf, topology);
StormClusterState stormClusterState = data.getStormClusterState();
// create /local-dir/nimbus/topologyId/xxxx files
setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology);
// wait for blob replication before activate topology
waitForDesiredCodeReplication(conf, topologyId);
// generate TaskInfo for every bolt or spout in ZK
// /ZK/tasks/topoologyId/xxx
setupZkTaskInfo(conf, topologyId, stormClusterState);
//mkdir topology error directory
String path = Cluster.taskerror_storm_root(topologyId);
stormClusterState.mkdir(path);
// make assignments for a topology
LOG.info("Submit for " + topologyName + " with conf " + serializedConf);
makeAssignment(topologyName, topologyId, options.get_initial_status());
// push start event after startup
double metricsSampleRate = ConfigExtension.getMetricSampleRate(stormConf);
StartTopologyEvent.pushEvent(topologyId, metricsSampleRate);
notifyTopologyActionListener(topologyName, "submitTopology");
} catch (FailedAssignTopologyException e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology, Root cause:");
if (e.getMessage() == null) {
sb.append("submit timeout");
} else {
sb.append(e.getMessage());
}
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new TopologyAssignException(sb.toString());
} catch (InvalidParameterException e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology ");
sb.append(e.getMessage());
sb.append(", cause:" + e.getCause());
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new InvalidParameterException(sb.toString());
} catch (InvalidTopologyException e) {
LOG.error("Topology is invalid. " + e.get_msg());
throw e;
} catch (Throwable e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology ");
sb.append(e.getMessage());
sb.append(", cause:" + e.getCause());
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new TopologyAssignException(sb.toString());
} finally {
// when make assignment for a topology,so remove the topologyid form
// pendingSubmitTopologys
data.getPendingSubmitTopologies().remove(topologyId);
double spend = (System.nanoTime() - start) / TimeUtils.NS_PER_US;
SimpleJStormMetric.updateNimbusHistogram("submitTopologyWithOpts", spend);
LOG.info("submitTopologyWithOpts {} costs {}ms", topologyName, spend);
}
return topologyId;
}
use of backtype.storm.generated.NotAliveException in project jstorm by alibaba.
the class ServiceHandler method getTopologyInfo.
/**
* Get TopologyInfo, it contain all data of the topology running status
*
* @return TopologyInfo
*/
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {
long start = System.nanoTime();
StormClusterState stormClusterState = data.getStormClusterState();
try {
// get topology's StormBase
StormBase base = stormClusterState.storm_base(topologyId, null);
if (base == null) {
throw new NotAliveException("No topology of " + topologyId);
}
Assignment assignment = stormClusterState.assignment_info(topologyId, null);
if (assignment == null) {
throw new NotAliveException("No topology of " + topologyId);
}
TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
Map<Integer, TaskHeartbeat> taskHbMap = null;
if (topologyTaskHbInfo != null)
taskHbMap = topologyTaskHbInfo.get_taskHbs();
Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
String errorString;
if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
errorString = "Y";
} else {
errorString = "";
}
TopologySummary topologySummary = new TopologySummary();
topologySummary.set_id(topologyId);
topologySummary.set_name(base.getStormName());
topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
topologySummary.set_status(base.getStatusString());
topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
topologySummary.set_numWorkers(assignment.getWorkers().size());
topologySummary.set_errorInfo(errorString);
Map<String, ComponentSummary> componentSummaryMap = new HashMap<String, ComponentSummary>();
HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
String name = entry.getKey();
List<Integer> taskIds = entry.getValue();
if (taskIds == null || taskIds.size() == 0) {
LOG.warn("No task of component " + name);
continue;
}
ComponentSummary componentSummary = new ComponentSummary();
componentSummaryMap.put(name, componentSummary);
componentSummary.set_name(name);
componentSummary.set_type(taskToType.get(taskIds.get(0)));
componentSummary.set_parallel(taskIds.size());
componentSummary.set_taskIds(taskIds);
}
Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<Integer, TaskSummary>();
Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
for (Integer taskId : taskInfoMap.keySet()) {
TaskSummary taskSummary = new TaskSummary();
taskSummaryMap.put(taskId, taskSummary);
taskSummary.set_taskId(taskId);
if (taskHbMap == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
TaskHeartbeat hb = taskHbMap.get(taskId);
if (hb == null) {
taskSummary.set_status("Starting");
taskSummary.set_uptime(0);
} else {
boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
if (isInactive)
taskSummary.set_status("INACTIVE");
else
taskSummary.set_status("ACTIVE");
taskSummary.set_uptime(hb.get_uptime());
}
}
if (StringUtils.isBlank(errorString)) {
continue;
}
List<TaskError> taskErrorList = taskErrors.get(taskId);
if (taskErrorList != null && taskErrorList.size() != 0) {
for (TaskError taskError : taskErrorList) {
ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
taskSummary.add_to_errors(errorInfo);
String component = taskToComponent.get(taskId);
componentSummaryMap.get(component).add_to_errors(errorInfo);
}
}
}
for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
String hostname = workerSlot.getHostname();
int port = workerSlot.getPort();
for (Integer taskId : workerSlot.getTasks()) {
TaskSummary taskSummary = taskSummaryMap.get(taskId);
taskSummary.set_host(hostname);
taskSummary.set_port(port);
}
}
TopologyInfo topologyInfo = new TopologyInfo();
topologyInfo.set_topology(topologySummary);
topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
// return topology metric & component metric only
List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
MetricInfo taskMetric = MetricUtils.mkMetricInfo();
MetricInfo streamMetric = MetricUtils.mkMetricInfo();
MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
MetricInfo tpMetric, compMetric, workerMetric;
if (tpMetricList == null || tpMetricList.size() == 0) {
tpMetric = MetricUtils.mkMetricInfo();
} else {
// get the last min topology metric
tpMetric = tpMetricList.get(tpMetricList.size() - 1);
}
if (compMetricList == null || compMetricList.size() == 0) {
compMetric = MetricUtils.mkMetricInfo();
} else {
compMetric = compMetricList.get(0);
}
if (workerMetricList == null || workerMetricList.size() == 0) {
workerMetric = MetricUtils.mkMetricInfo();
} else {
workerMetric = workerMetricList.get(0);
}
TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
topologyInfo.set_metrics(topologyMetrics);
return topologyInfo;
} catch (TException e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw e;
} catch (Exception e) {
LOG.info("Failed to get topologyInfo " + topologyId, e);
throw new TException("Failed to get topologyInfo" + topologyId);
} finally {
long end = System.nanoTime();
SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
}
}
Aggregations