use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.
the class TopologyAssign method getFreeSlots.
/**
* Get free resources
*
* @param supervisorInfos
* @param stormClusterState
* @throws Exception
*/
public static void getFreeSlots(Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState) throws Exception {
Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);
for (Entry<String, Assignment> entry : assignments.entrySet()) {
String topologyId = entry.getKey();
Assignment assignment = entry.getValue();
Set<ResourceWorkerSlot> workers = assignment.getWorkers();
for (ResourceWorkerSlot worker : workers) {
SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId());
if (supervisorInfo == null) {
// the supervisor is dead
continue;
}
supervisorInfo.getAvailableWorkerPorts().remove(worker.getPort());
}
}
}
use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.
the class TopologyAssign method prepareTopologyAssign.
protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception {
TopologyAssignContext ret = new TopologyAssignContext();
String topologyId = event.getTopologyId();
ret.setTopologyId(topologyId);
int topoMasterId = nimbusData.getTasksHeartbeat().get(topologyId).get_topologyMasterId();
ret.setTopologyMasterTaskId(topoMasterId);
LOG.info("prepareTopologyAssign, topoMasterId={}", topoMasterId);
Map<Object, Object> nimbusConf = nimbusData.getConf();
Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(topologyId, nimbusData.getBlobStore());
StormTopology rawTopology = StormConfig.read_nimbus_topology_code(topologyId, nimbusData.getBlobStore());
ret.setRawTopology(rawTopology);
Map stormConf = new HashMap();
stormConf.putAll(nimbusConf);
stormConf.putAll(topologyConf);
ret.setStormConf(stormConf);
StormClusterState stormClusterState = nimbusData.getStormClusterState();
// get all running supervisor, don't need callback to watch supervisor
Map<String, SupervisorInfo> supInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
// init all AvailableWorkerPorts
for (Entry<String, SupervisorInfo> supInfo : supInfos.entrySet()) {
SupervisorInfo supervisor = supInfo.getValue();
if (supervisor != null)
supervisor.setAvailableWorkerPorts(supervisor.getWorkerPorts());
}
getAliveSupervsByHb(supInfos, nimbusConf);
if (supInfos.size() == 0) {
throw new FailedAssignTopologyException("Failed to make assignment " + topologyId + ", due to no alive supervisor");
}
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
ret.setTaskToComponent(taskToComponent);
// get taskids /ZK/tasks/topologyId
Set<Integer> allTaskIds = taskToComponent.keySet();
if (allTaskIds == null || allTaskIds.size() == 0) {
String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
LOG.warn(errMsg);
throw new IOException(errMsg);
}
ret.setAllTaskIds(allTaskIds);
Set<Integer> aliveTasks = new HashSet<Integer>();
// unstoppedTasks are tasks which are alive on no supervisor's(dead)
// machine
Set<Integer> unstoppedTasks = new HashSet<Integer>();
Set<Integer> deadTasks = new HashSet<Integer>();
Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();
Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
if (existingAssignment != null) {
aliveTasks = getAliveTasks(topologyId, allTaskIds);
/*
* Check if the topology master task is alive first since all task
* heartbeat info is reported by topology master.
* If master is dead, do reassignment for topology master first.
*/
if (aliveTasks.contains(topoMasterId) == false) {
ResourceWorkerSlot worker = existingAssignment.getWorkerByTaskId(topoMasterId);
deadTasks.addAll(worker.getTasks());
Set<Integer> tempSet = new HashSet<Integer>(allTaskIds);
tempSet.removeAll(deadTasks);
aliveTasks.addAll(tempSet);
aliveTasks.removeAll(deadTasks);
} else {
deadTasks.addAll(allTaskIds);
deadTasks.removeAll(aliveTasks);
}
unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);
}
ret.setDeadTaskIds(deadTasks);
ret.setUnstoppedTaskIds(unstoppedTasks);
// Step 2: get all slots resource, free slots/ alive slots/ unstopped
// slots
getFreeSlots(supInfos, stormClusterState);
ret.setCluster(supInfos);
if (existingAssignment == null) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);
try {
AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
if (lastAssignment != null) {
ret.setOldAssignment(lastAssignment.getAssignment());
}
} catch (Exception e) {
LOG.warn("Fail to get old assignment", e);
}
} else {
ret.setOldAssignment(existingAssignment);
if (event.isScratch()) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
ret.setIsReassign(event.isReassign());
unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
} else {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
}
}
return ret;
}
use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.
the class TopologyAssign method mkAssignment.
/**
* make assignments for a topology The nimbus core function, this function has been totally rewrite
*
* @throws Exception
*/
public Assignment mkAssignment(TopologyAssignEvent event) throws Exception {
String topologyId = event.getTopologyId();
LOG.info("Determining assignment for " + topologyId);
TopologyAssignContext context = prepareTopologyAssign(event);
Set<ResourceWorkerSlot> assignments = null;
if (!StormConfig.local_mode(nimbusData.getConf())) {
IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME);
assignments = scheduler.assignTasks(context);
} else {
assignments = mkLocalAssignment(context);
}
Assignment assignment = null;
if (assignments != null && assignments.size() > 0) {
Map<String, String> nodeHost = getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments);
Map<Integer, Integer> startTimes = getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments);
String codeDir = (String) nimbusData.getConf().get(Config.STORM_LOCAL_DIR);
assignment = new Assignment(codeDir, assignments, nodeHost, startTimes);
// the topology binary changed.
if (event.isScaleTopology()) {
assignment.setAssignmentType(Assignment.AssignmentType.ScaleTopology);
}
StormClusterState stormClusterState = nimbusData.getStormClusterState();
stormClusterState.set_assignment(topologyId, assignment);
// update task heartbeat's start time
NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId);
// @@@ TODO
// Update metrics information in ZK when rebalance or reassignment
// Only update metrics monitor status when creating topology
// if (context.getAssignType() ==
// TopologyAssignContext.ASSIGN_TYPE_REBALANCE
// || context.getAssignType() ==
// TopologyAssignContext.ASSIGN_TYPE_MONITOR)
// NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment);
NimbusUtils.updateTopologyTaskTimeout(nimbusData, topologyId);
LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment);
}
return assignment;
}
use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.
the class RefreshEvent method doRefreshTopologies.
/**
* refresh metric settings of topologies and sync metric meta from local cache
*/
@SuppressWarnings("unchecked")
private void doRefreshTopologies() {
for (String topology : JStormMetrics.SYS_TOPOLOGIES) {
if (!context.getTopologyMetricContexts().containsKey(topology)) {
LOG.info("adding {} to metric context.", topology);
Map conf = new HashMap();
if (topology.equals(JStormMetrics.CLUSTER_METRIC_KEY)) {
//there's no need to consider sample rate when cluster metrics merge
conf.put(ConfigExtension.TOPOLOGY_METRIC_SAMPLE_RATE, 1.0);
}
Set<ResourceWorkerSlot> workerSlot = Sets.newHashSet(new ResourceWorkerSlot());
TopologyMetricContext metricContext = new TopologyMetricContext(topology, workerSlot, conf);
context.getTopologyMetricContexts().putIfAbsent(topology, metricContext);
syncMetaFromCache(topology, context.getTopologyMetricContexts().get(topology));
syncMetaFromRemote(topology, context.getTopologyMetricContexts().get(topology));
}
}
Map<String, Assignment> assignMap;
try {
assignMap = Cluster.get_all_assignment(context.getStormClusterState(), null);
for (Entry<String, Assignment> entry : assignMap.entrySet()) {
String topologyId = entry.getKey();
Assignment assignment = entry.getValue();
TopologyMetricContext metricContext = context.getTopologyMetricContexts().get(topologyId);
if (metricContext == null) {
metricContext = new TopologyMetricContext(assignment.getWorkers());
metricContext.setTaskNum(NimbusUtils.getTopologyTaskNum(assignment));
syncMetaFromCache(topologyId, metricContext);
LOG.info("adding {} to metric context.", topologyId);
context.getTopologyMetricContexts().put(topologyId, metricContext);
} else {
boolean modify = false;
if (metricContext.getTaskNum() != NimbusUtils.getTopologyTaskNum(assignment)) {
modify = true;
metricContext.setTaskNum(NimbusUtils.getTopologyTaskNum(assignment));
}
if (!assignment.getWorkers().equals(metricContext.getWorkerSet())) {
modify = true;
metricContext.setWorkerSet(assignment.getWorkers());
}
// we may need to sync meta when task num/workers change
metricContext.setSyncMeta(!modify);
}
}
} catch (Exception e1) {
LOG.warn("Failed to get assignments");
return;
}
List<String> removing = new ArrayList<>();
for (String topologyId : context.getTopologyMetricContexts().keySet()) {
if (!JStormMetrics.SYS_TOPOLOGY_SET.contains(topologyId) && !assignMap.containsKey(topologyId)) {
removing.add(topologyId);
}
}
for (String topologyId : removing) {
LOG.info("removing topology:{}", topologyId);
RemoveTopologyEvent.pushEvent(topologyId);
}
}
use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.
the class SyncSupervisorEvent method run.
@Override
public void run() {
LOG.debug("Synchronizing supervisor, interval seconds:" + TimeUtils.time_delta(lastTime));
lastTime = TimeUtils.current_time_secs();
//In order to ensure that the status is the same for each execution of syncsupervisor
MachineCheckStatus checkStatus = new MachineCheckStatus();
checkStatus.SetType(heartbeat.getCheckStatus().getType());
try {
RunnableCallback syncCallback = new EventManagerZkPusher(this, syncSupEventManager);
Map<String, Integer> assignmentVersion = (Map<String, Integer>) localState.get(Common.LS_LOCAL_ZK_ASSIGNMENT_VERSION);
if (assignmentVersion == null) {
assignmentVersion = new HashMap<String, Integer>();
}
Map<String, Assignment> assignments = (Map<String, Assignment>) localState.get(Common.LS_LOCAl_ZK_ASSIGNMENTS);
if (assignments == null) {
assignments = new HashMap<String, Assignment>();
}
LOG.debug("get local assignments " + assignments);
LOG.debug("get local assignments version " + assignmentVersion);
if (checkStatus.getType().equals(MachineCheckStatus.StatusType.panic) || checkStatus.getType().equals(MachineCheckStatus.StatusType.error)) {
// if statuts is pannic or error, it will clear all assignments and kill all workers;
assignmentVersion.clear();
assignments.clear();
LOG.warn("Supervisor Machine Check Status :" + checkStatus.getType() + ", so kill all workers.");
} else {
getAllAssignments(assignmentVersion, assignments, syncCallback);
}
LOG.debug("Get all assignments " + assignments);
/**
* Step 2: get topologyIds list from STORM-LOCAL-DIR/supervisor/stormdist/
*/
List<String> downloadedTopologyIds = StormConfig.get_supervisor_toplogy_list(conf);
LOG.debug("Downloaded storm ids: " + downloadedTopologyIds);
/**
* Step 3: get <port,LocalAssignments> from ZK local node's assignment
*/
Map<Integer, LocalAssignment> zkAssignment;
zkAssignment = getLocalAssign(stormClusterState, supervisorId, assignments);
Map<Integer, LocalAssignment> localAssignment;
/**
* Step 4: writer local assignment to LocalState
*/
try {
LOG.debug("Writing local assignment " + zkAssignment);
localAssignment = (Map<Integer, LocalAssignment>) localState.get(Common.LS_LOCAL_ASSIGNMENTS);
if (localAssignment == null) {
localAssignment = new HashMap<Integer, LocalAssignment>();
}
localState.put(Common.LS_LOCAL_ASSIGNMENTS, zkAssignment);
} catch (IOException e) {
LOG.error("put LS_LOCAL_ASSIGNMENTS " + zkAssignment + " of localState failed");
throw e;
}
/**
* Step 5: get reloaded topologys
*/
Set<String> updateTopologys;
updateTopologys = getUpdateTopologys(localAssignment, zkAssignment, assignments);
Set<String> reDownloadTopologys = getNeedReDownloadTopologys(localAssignment);
if (reDownloadTopologys != null) {
updateTopologys.addAll(reDownloadTopologys);
}
/**
* Step 6: download code from ZK
*/
Map<String, String> topologyCodes = getTopologyCodeLocations(assignments, supervisorId);
// downloadFailedTopologyIds which can't finished download binary from nimbus
Set<String> downloadFailedTopologyIds = new HashSet<String>();
downloadTopology(topologyCodes, downloadedTopologyIds, updateTopologys, assignments, downloadFailedTopologyIds);
/**
* Step 7: remove any downloaded useless topology
*/
removeUselessTopology(topologyCodes, downloadedTopologyIds);
/**
* Step 7: push syncProcesses Event
*/
// processEventManager.add(syncProcesses);
syncProcesses.run(zkAssignment, downloadFailedTopologyIds);
// If everything is OK, set the trigger to update heartbeat of
// supervisor
heartbeat.updateHbTrigger(true);
try {
// update localState
localState.put(Common.LS_LOCAL_ZK_ASSIGNMENT_VERSION, assignmentVersion);
localState.put(Common.LS_LOCAl_ZK_ASSIGNMENTS, assignments);
} catch (IOException e) {
LOG.error("put LS_LOCAL_ZK_ASSIGNMENT_VERSION&&LS_LOCAl_ZK_ASSIGNMENTS failed");
throw e;
}
} catch (Exception e) {
LOG.error("Failed to Sync Supervisor", e);
// throw new RuntimeException(e);
}
if (checkStatus.getType().equals(MachineCheckStatus.StatusType.panic)) {
// if statuts is pannic, it will kill supervisor;
JStormUtils.halt_process(0, "Supervisor Machine Check Status : Panic , !!!!shutdown!!!!");
}
}
Aggregations