Search in sources :

Example 6 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class ServiceHandler method getTopologyInfo.

/**
     * Get TopologyInfo, it contain all data of the topology running status
     *
     * @return TopologyInfo
     */
@Override
public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {
    long start = System.nanoTime();
    StormClusterState stormClusterState = data.getStormClusterState();
    try {
        // get topology's StormBase
        StormBase base = stormClusterState.storm_base(topologyId, null);
        if (base == null) {
            throw new NotAliveException("No topology of " + topologyId);
        }
        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
            throw new NotAliveException("No topology of " + topologyId);
        }
        TopologyTaskHbInfo topologyTaskHbInfo = data.getTasksHeartbeat().get(topologyId);
        Map<Integer, TaskHeartbeat> taskHbMap = null;
        if (topologyTaskHbInfo != null)
            taskHbMap = topologyTaskHbInfo.get_taskHbs();
        Map<Integer, TaskInfo> taskInfoMap = Cluster.get_all_taskInfo(stormClusterState, topologyId);
        Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, taskInfoMap);
        Map<Integer, String> taskToType = Cluster.get_all_task_type(stormClusterState, topologyId, taskInfoMap);
        String errorString;
        if (Cluster.is_topology_exist_error(stormClusterState, topologyId)) {
            errorString = "Y";
        } else {
            errorString = "";
        }
        TopologySummary topologySummary = new TopologySummary();
        topologySummary.set_id(topologyId);
        topologySummary.set_name(base.getStormName());
        topologySummary.set_uptimeSecs(TimeUtils.time_delta(base.getLanchTimeSecs()));
        topologySummary.set_status(base.getStatusString());
        topologySummary.set_numTasks(NimbusUtils.getTopologyTaskNum(assignment));
        topologySummary.set_numWorkers(assignment.getWorkers().size());
        topologySummary.set_errorInfo(errorString);
        Map<String, ComponentSummary> componentSummaryMap = new HashMap<String, ComponentSummary>();
        HashMap<String, List<Integer>> componentToTasks = JStormUtils.reverse_map(taskToComponent);
        for (Entry<String, List<Integer>> entry : componentToTasks.entrySet()) {
            String name = entry.getKey();
            List<Integer> taskIds = entry.getValue();
            if (taskIds == null || taskIds.size() == 0) {
                LOG.warn("No task of component " + name);
                continue;
            }
            ComponentSummary componentSummary = new ComponentSummary();
            componentSummaryMap.put(name, componentSummary);
            componentSummary.set_name(name);
            componentSummary.set_type(taskToType.get(taskIds.get(0)));
            componentSummary.set_parallel(taskIds.size());
            componentSummary.set_taskIds(taskIds);
        }
        Map<Integer, TaskSummary> taskSummaryMap = new TreeMap<Integer, TaskSummary>();
        Map<Integer, List<TaskError>> taskErrors = Cluster.get_all_task_errors(stormClusterState, topologyId);
        for (Integer taskId : taskInfoMap.keySet()) {
            TaskSummary taskSummary = new TaskSummary();
            taskSummaryMap.put(taskId, taskSummary);
            taskSummary.set_taskId(taskId);
            if (taskHbMap == null) {
                taskSummary.set_status("Starting");
                taskSummary.set_uptime(0);
            } else {
                TaskHeartbeat hb = taskHbMap.get(taskId);
                if (hb == null) {
                    taskSummary.set_status("Starting");
                    taskSummary.set_uptime(0);
                } else {
                    boolean isInactive = NimbusUtils.isTaskDead(data, topologyId, taskId);
                    if (isInactive)
                        taskSummary.set_status("INACTIVE");
                    else
                        taskSummary.set_status("ACTIVE");
                    taskSummary.set_uptime(hb.get_uptime());
                }
            }
            if (StringUtils.isBlank(errorString)) {
                continue;
            }
            List<TaskError> taskErrorList = taskErrors.get(taskId);
            if (taskErrorList != null && taskErrorList.size() != 0) {
                for (TaskError taskError : taskErrorList) {
                    ErrorInfo errorInfo = new ErrorInfo(taskError.getError(), taskError.getTimSecs(), taskError.getLevel(), taskError.getCode());
                    taskSummary.add_to_errors(errorInfo);
                    String component = taskToComponent.get(taskId);
                    componentSummaryMap.get(component).add_to_errors(errorInfo);
                }
            }
        }
        for (ResourceWorkerSlot workerSlot : assignment.getWorkers()) {
            String hostname = workerSlot.getHostname();
            int port = workerSlot.getPort();
            for (Integer taskId : workerSlot.getTasks()) {
                TaskSummary taskSummary = taskSummaryMap.get(taskId);
                taskSummary.set_host(hostname);
                taskSummary.set_port(port);
            }
        }
        TopologyInfo topologyInfo = new TopologyInfo();
        topologyInfo.set_topology(topologySummary);
        topologyInfo.set_components(JStormUtils.mk_list(componentSummaryMap.values()));
        topologyInfo.set_tasks(JStormUtils.mk_list(taskSummaryMap.values()));
        // return topology metric & component metric only
        List<MetricInfo> tpMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.TOPOLOGY);
        List<MetricInfo> compMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.COMPONENT);
        List<MetricInfo> workerMetricList = data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
        MetricInfo taskMetric = MetricUtils.mkMetricInfo();
        MetricInfo streamMetric = MetricUtils.mkMetricInfo();
        MetricInfo nettyMetric = MetricUtils.mkMetricInfo();
        MetricInfo tpMetric, compMetric, workerMetric;
        if (tpMetricList == null || tpMetricList.size() == 0) {
            tpMetric = MetricUtils.mkMetricInfo();
        } else {
            // get the last min topology metric
            tpMetric = tpMetricList.get(tpMetricList.size() - 1);
        }
        if (compMetricList == null || compMetricList.size() == 0) {
            compMetric = MetricUtils.mkMetricInfo();
        } else {
            compMetric = compMetricList.get(0);
        }
        if (workerMetricList == null || workerMetricList.size() == 0) {
            workerMetric = MetricUtils.mkMetricInfo();
        } else {
            workerMetric = workerMetricList.get(0);
        }
        TopologyMetric topologyMetrics = new TopologyMetric(tpMetric, compMetric, workerMetric, taskMetric, streamMetric, nettyMetric);
        topologyInfo.set_metrics(topologyMetrics);
        return topologyInfo;
    } catch (TException e) {
        LOG.info("Failed to get topologyInfo " + topologyId, e);
        throw e;
    } catch (Exception e) {
        LOG.info("Failed to get topologyInfo " + topologyId, e);
        throw new TException("Failed to get topologyInfo" + topologyId);
    } finally {
        long end = System.nanoTime();
        SimpleJStormMetric.updateNimbusHistogram("getTopologyInfo", (end - start) / TimeUtils.NS_PER_US);
    }
}
Also used : TException(org.apache.thrift.TException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) StormBase(com.alibaba.jstorm.cluster.StormBase) ComponentSummary(backtype.storm.generated.ComponentSummary) Assignment(com.alibaba.jstorm.schedule.Assignment) TaskInfo(com.alibaba.jstorm.task.TaskInfo) NotAliveException(backtype.storm.generated.NotAliveException) ArrayList(java.util.ArrayList) List(java.util.List) TopologySummary(backtype.storm.generated.TopologySummary) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot) TopologyTaskHbInfo(backtype.storm.generated.TopologyTaskHbInfo) ErrorInfo(backtype.storm.generated.ErrorInfo) TaskError(com.alibaba.jstorm.task.error.TaskError) TopologyMetric(backtype.storm.generated.TopologyMetric) TreeMap(java.util.TreeMap) InvalidParameterException(java.security.InvalidParameterException) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) KeyNotFoundException(backtype.storm.generated.KeyNotFoundException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) FileNotFoundException(java.io.FileNotFoundException) NotAliveException(backtype.storm.generated.NotAliveException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) KeyAlreadyExistsException(backtype.storm.generated.KeyAlreadyExistsException) TaskHeartbeat(backtype.storm.generated.TaskHeartbeat) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) MetricInfo(backtype.storm.generated.MetricInfo) TaskSummary(backtype.storm.generated.TaskSummary) TopologyInfo(backtype.storm.generated.TopologyInfo)

Example 7 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class ServiceHandler method getSupervisorWorkersByHostOrId.

/**
     * gets supervisor workers by host or supervisor id, note that id priors to host.
     *
     * @param host host
     * @param id   supervisor id
     * @return supervisor workers
     */
private SupervisorWorkers getSupervisorWorkersByHostOrId(String host, String id) throws TException {
    long start = System.nanoTime();
    if (StringUtils.isBlank(id) && StringUtils.isBlank(host)) {
        throw new TException("Must specify host or supervisor id!");
    }
    try {
        StormClusterState stormClusterState = data.getStormClusterState();
        // all supervisors
        Map<String, SupervisorInfo> supervisorInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
        SupervisorInfo supervisorInfo = null;
        String ip;
        if (!StringUtils.isBlank(id)) {
            supervisorInfo = supervisorInfos.get(id);
            host = supervisorInfo.getHostName();
            ip = NetWorkUtils.host2Ip(host);
        } else {
            ip = NetWorkUtils.host2Ip(host);
            for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {
                SupervisorInfo info = entry.getValue();
                if (info.getHostName().equals(host) || info.getHostName().equals(ip)) {
                    id = entry.getKey();
                    supervisorInfo = info;
                    break;
                }
            }
        }
        if (supervisorInfo == null) {
            throw new TException("unknown supervisor id:" + id);
        }
        Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);
        Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
        int usedSlotNumber = 0;
        Map<String, Map<Integer, String>> topologyTaskToComponent = new HashMap<String, Map<Integer, String>>();
        Map<String, MetricInfo> metricInfoMap = new HashMap<String, MetricInfo>();
        for (Entry<String, Assignment> entry : assignments.entrySet()) {
            String topologyId = entry.getKey();
            Assignment assignment = entry.getValue();
            Set<ResourceWorkerSlot> workers = assignment.getWorkers();
            for (ResourceWorkerSlot worker : workers) {
                if (!id.equals(worker.getNodeId())) {
                    continue;
                }
                usedSlotNumber++;
                Integer port = worker.getPort();
                WorkerSummary workerSummary = portWorkerSummarys.get(port);
                if (workerSummary == null) {
                    workerSummary = new WorkerSummary();
                    workerSummary.set_port(port);
                    workerSummary.set_topology(topologyId);
                    workerSummary.set_tasks(new ArrayList<TaskComponent>());
                    portWorkerSummarys.put(port, workerSummary);
                }
                Map<Integer, String> taskToComponent = topologyTaskToComponent.get(topologyId);
                if (taskToComponent == null) {
                    taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
                    topologyTaskToComponent.put(topologyId, taskToComponent);
                }
                int earliest = TimeUtils.current_time_secs();
                for (Integer taskId : worker.getTasks()) {
                    TaskComponent taskComponent = new TaskComponent();
                    taskComponent.set_component(taskToComponent.get(taskId));
                    taskComponent.set_taskId(taskId);
                    Integer startTime = assignment.getTaskStartTimeSecs().get(taskId);
                    if (startTime != null && startTime < earliest) {
                        earliest = startTime;
                    }
                    workerSummary.add_to_tasks(taskComponent);
                }
                workerSummary.set_uptime(TimeUtils.time_delta(earliest));
                String workerSlotName = getWorkerSlotName(ip, port);
                List<MetricInfo> workerMetricInfoList = this.data.getMetricCache().getMetricData(topologyId, MetaType.WORKER);
                if (workerMetricInfoList.size() > 0) {
                    MetricInfo workerMetricInfo = workerMetricInfoList.get(0);
                    // remove metrics that don't belong to current worker
                    for (Iterator<String> itr = workerMetricInfo.get_metrics().keySet().iterator(); itr.hasNext(); ) {
                        String metricName = itr.next();
                        if (!metricName.contains(ip)) {
                            itr.remove();
                        }
                    }
                    metricInfoMap.put(workerSlotName, workerMetricInfo);
                }
            }
        }
        List<WorkerSummary> workerList = new ArrayList<WorkerSummary>();
        workerList.addAll(portWorkerSummarys.values());
        Map<String, Integer> supervisorToUsedSlotNum = new HashMap<String, Integer>();
        supervisorToUsedSlotNum.put(id, usedSlotNumber);
        SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, id, supervisorToUsedSlotNum);
        return new SupervisorWorkers(supervisorSummary, workerList, metricInfoMap);
    } catch (TException e) {
        LOG.info("Failed to get ClusterSummary ", e);
        throw e;
    } catch (Exception e) {
        LOG.info("Failed to get ClusterSummary ", e);
        throw new TException(e);
    } finally {
        long end = System.nanoTime();
        SimpleJStormMetric.updateNimbusHistogram("getSupervisorWorkers", (end - start) / TimeUtils.NS_PER_US);
    }
}
Also used : TException(org.apache.thrift.TException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SupervisorSummary(backtype.storm.generated.SupervisorSummary) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo) Assignment(com.alibaba.jstorm.schedule.Assignment) SupervisorWorkers(backtype.storm.generated.SupervisorWorkers) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot) TaskComponent(backtype.storm.generated.TaskComponent) TreeMap(java.util.TreeMap) InvalidParameterException(java.security.InvalidParameterException) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) KeyNotFoundException(backtype.storm.generated.KeyNotFoundException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) FileNotFoundException(java.io.FileNotFoundException) NotAliveException(backtype.storm.generated.NotAliveException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) KeyAlreadyExistsException(backtype.storm.generated.KeyAlreadyExistsException) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) WorkerSummary(backtype.storm.generated.WorkerSummary) MetricInfo(backtype.storm.generated.MetricInfo) Map(java.util.Map) TreeMap(java.util.TreeMap) TimeCacheMap(com.alibaba.jstorm.utils.TimeCacheMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 8 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class TopologyAssign method backupAssignment.

/**
     * Backup the toplogy's Assignment to ZK
     * 
     * @param assignment
     * @param event
     * @@@ Question Do we need to do backup operation every time?
     */
public void backupAssignment(Assignment assignment, TopologyAssignEvent event) {
    String topologyId = event.getTopologyId();
    String topologyName = event.getTopologyName();
    try {
        StormClusterState zkClusterState = nimbusData.getStormClusterState();
        // one little problem, get tasks twice when assign one topology
        Map<Integer, String> tasks = Cluster.get_all_task_component(zkClusterState, topologyId, null);
        Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks);
        for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
            List<Integer> keys = entry.getValue();
            Collections.sort(keys);
        }
        AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment);
        zkClusterState.backup_assignment(topologyName, assignmentBak);
    } catch (Exception e) {
        LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e);
    }
}
Also used : StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) AssignmentBak(com.alibaba.jstorm.schedule.AssignmentBak) ArrayList(java.util.ArrayList) List(java.util.List) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) IOException(java.io.IOException)

Example 9 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class ServiceHandler method updateTopology.

@Override
public void updateTopology(String name, String uploadedLocation, String updateConf) throws NotAliveException, InvalidTopologyException, TException {
    try {
        //firstly update jar and conf
        checkTopologyActive(data, name, true);
        String topologyId = null;
        StormClusterState stormClusterState = data.getStormClusterState();
        topologyId = Cluster.get_topology_id(stormClusterState, name);
        if (topologyId == null) {
            throw new NotAliveException(name);
        }
        BlobStore blobStore = data.getBlobStore();
        StormClusterState clusterState = data.getStormClusterState();
        NimbusInfo nimbusInfo = data.getNimbusHostPortInfo();
        if (uploadedLocation != null) {
            setupJar(uploadedLocation, topologyId, blobStore, clusterState, nimbusInfo, true);
        }
        Map topoConf = StormConfig.read_nimbus_topology_conf(topologyId, data.getBlobStore());
        Map<Object, Object> config = (Map<Object, Object>) JStormUtils.from_json(updateConf);
        topoConf.putAll(config);
        String confKey = StormConfig.master_stormconf_key(topologyId);
        BlobStoreUtils.updateBlob(blobStore, confKey, Utils.serialize(topoConf));
        if (blobStore instanceof LocalFsBlobStore) {
            clusterState.setup_blobstore(confKey, nimbusInfo, BlobStoreUtils.getVersionForKey(confKey, nimbusInfo, conf));
        }
        NimbusUtils.transitionName(data, name, true, StatusType.update_topology, config);
        LOG.info("update topology " + name + " successfully");
        notifyTopologyActionListener(name, "updateTopology");
    } catch (NotAliveException e) {
        String errMsg = "Error, no this topology " + name;
        LOG.error(errMsg, e);
        throw new NotAliveException(errMsg);
    } catch (Exception e) {
        String errMsg = "Failed to update topology " + name;
        LOG.error(errMsg, e);
        throw new TException(errMsg);
    }
}
Also used : TException(org.apache.thrift.TException) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) LocalFsBlobStore(com.alibaba.jstorm.blobstore.LocalFsBlobStore) NotAliveException(backtype.storm.generated.NotAliveException) Map(java.util.Map) TreeMap(java.util.TreeMap) TimeCacheMap(com.alibaba.jstorm.utils.TimeCacheMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) BlobStore(com.alibaba.jstorm.blobstore.BlobStore) LocalFsBlobStore(com.alibaba.jstorm.blobstore.LocalFsBlobStore) InvalidParameterException(java.security.InvalidParameterException) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) KeyNotFoundException(backtype.storm.generated.KeyNotFoundException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) FileNotFoundException(java.io.FileNotFoundException) NotAliveException(backtype.storm.generated.NotAliveException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) KeyAlreadyExistsException(backtype.storm.generated.KeyAlreadyExistsException) NimbusInfo(backtype.storm.nimbus.NimbusInfo)

Example 10 with StormClusterState

use of com.alibaba.jstorm.cluster.StormClusterState in project jstorm by alibaba.

the class FollowerRunnable method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    LOG.info("Follower Thread starts!");
    while (state) {
        StormClusterState zkClusterState = data.getStormClusterState();
        try {
            Thread.sleep(sleepTime);
            if (!zkClusterState.leader_existed()) {
                this.tryToBeLeader(data.getConf());
                continue;
            }
            String master = zkClusterState.get_leader_host();
            boolean isZkLeader = isLeader(master);
            if (isZkLeader) {
                if (!data.isLeader()) {
                    zkClusterState.unregister_nimbus_host(hostPort);
                    zkClusterState.unregister_nimbus_detail(hostPort);
                    data.setLeader(true);
                    leaderCallback.execute();
                }
                continue;
            } else {
                if (data.isLeader()) {
                    LOG.info("New ZK master is " + master);
                    JStormUtils.halt_process(1, "Lose ZK master node, halt process");
                    return;
                }
            }
            // here the nimbus is not leader
            if (data.getBlobStore() instanceof LocalFsBlobStore) {
                blobSync();
            }
            zkClusterState.update_nimbus_slave(hostPort, data.uptime());
            update_nimbus_detail();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            continue;
        } catch (Exception e) {
            if (state) {
                LOG.error("Unknow exception ", e);
            }
        }
    }
    LOG.info("Follower Thread has closed!");
}
Also used : StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) LocalFsBlobStore(com.alibaba.jstorm.blobstore.LocalFsBlobStore)

Aggregations

StormClusterState (com.alibaba.jstorm.cluster.StormClusterState)32 IOException (java.io.IOException)12 NotAliveException (backtype.storm.generated.NotAliveException)10 FailedAssignTopologyException (com.alibaba.jstorm.utils.FailedAssignTopologyException)10 TException (org.apache.thrift.TException)10 InvalidParameterException (java.security.InvalidParameterException)9 HashMap (java.util.HashMap)9 AlreadyAliveException (backtype.storm.generated.AlreadyAliveException)8 InvalidTopologyException (backtype.storm.generated.InvalidTopologyException)8 KeyAlreadyExistsException (backtype.storm.generated.KeyAlreadyExistsException)8 KeyNotFoundException (backtype.storm.generated.KeyNotFoundException)8 TopologyAssignException (backtype.storm.generated.TopologyAssignException)8 FileNotFoundException (java.io.FileNotFoundException)8 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)8 LocalFsBlobStore (com.alibaba.jstorm.blobstore.LocalFsBlobStore)7 Assignment (com.alibaba.jstorm.schedule.Assignment)7 ResourceWorkerSlot (com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)7 Map (java.util.Map)7 TreeMap (java.util.TreeMap)7 BlobStore (com.alibaba.jstorm.blobstore.BlobStore)5