use of com.alibaba.jstorm.schedule.AssignmentBak in project jstorm by alibaba.
the class TopologyAssign method backupAssignment.
/**
* Backup topology assignment to ZK
*
* todo: Do we need to do backup operation every time?
*/
public void backupAssignment(Assignment assignment, TopologyAssignEvent event) {
String topologyId = event.getTopologyId();
String topologyName = event.getTopologyName();
try {
StormClusterState zkClusterState = nimbusData.getStormClusterState();
// one little problem, get tasks twice when assign one topology
Map<Integer, String> tasks = Cluster.get_all_task_component(zkClusterState, topologyId, null);
Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks);
for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
List<Integer> keys = entry.getValue();
Collections.sort(keys);
}
AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment);
zkClusterState.backup_assignment(topologyName, assignmentBak);
} catch (Exception e) {
LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e);
}
}
use of com.alibaba.jstorm.schedule.AssignmentBak in project jstorm by alibaba.
the class TopologyAssign method prepareTopologyAssign.
protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception {
TopologyAssignContext ret = new TopologyAssignContext();
String topologyId = event.getTopologyId();
ret.setTopologyId(topologyId);
int topoMasterId = nimbusData.getTasksHeartbeat().get(topologyId).get_topologyMasterId();
ret.setTopologyMasterTaskId(topoMasterId);
LOG.info("prepareTopologyAssign, topoMasterId={}", topoMasterId);
Map<Object, Object> nimbusConf = nimbusData.getConf();
Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(topologyId, nimbusData.getBlobStore());
StormTopology rawTopology = StormConfig.read_nimbus_topology_code(topologyId, nimbusData.getBlobStore());
ret.setRawTopology(rawTopology);
Map stormConf = new HashMap();
LOG.info("GET RESERVE_WORKERS from ={}", Utils.readDefaultConfig());
LOG.info("RESERVE_WORKERS ={}", Utils.readDefaultConfig().get(Config.RESERVE_WORKERS));
stormConf.put(Config.RESERVE_WORKERS, Utils.readDefaultConfig().get(Config.RESERVE_WORKERS));
stormConf.putAll(nimbusConf);
stormConf.putAll(topologyConf);
ret.setStormConf(stormConf);
StormClusterState stormClusterState = nimbusData.getStormClusterState();
// get all running supervisor, don't need callback to watch supervisor
Map<String, SupervisorInfo> supInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
// init all AvailableWorkerPorts
for (Entry<String, SupervisorInfo> supInfo : supInfos.entrySet()) {
SupervisorInfo supervisor = supInfo.getValue();
if (supervisor != null)
supervisor.setAvailableWorkerPorts(supervisor.getWorkerPorts());
}
getAliveSupervsByHb(supInfos, nimbusConf);
if (supInfos.size() == 0) {
throw new FailedAssignTopologyException("Failed to make assignment " + topologyId + ", due to no alive supervisor");
}
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
ret.setTaskToComponent(taskToComponent);
// get taskids /ZK/tasks/topologyId
Set<Integer> allTaskIds = taskToComponent.keySet();
if (allTaskIds.size() == 0) {
String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
LOG.warn(errMsg);
throw new IOException(errMsg);
}
ret.setAllTaskIds(allTaskIds);
Set<Integer> aliveTasks = new HashSet<>();
// unstoppedTasks are tasks which are alive on no supervisor's(dead)
// machine
Set<Integer> unstoppedTasks = new HashSet<>();
Set<Integer> deadTasks = new HashSet<>();
Set<ResourceWorkerSlot> unstoppedWorkers;
Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
if (existingAssignment != null) {
/*
* Check if the topology master task is alive first since all task
* heartbeat info is reported by topology master.
* If master is dead, do reassignment for topology master first.
*/
if (NimbusUtils.isTaskDead(nimbusData, topologyId, topoMasterId)) {
ResourceWorkerSlot tmWorker = existingAssignment.getWorkerByTaskId(topoMasterId);
deadTasks.addAll(tmWorker.getTasks());
} else {
deadTasks = getDeadTasks(topologyId, allTaskIds, existingAssignment.getWorkers());
}
aliveTasks.addAll(allTaskIds);
aliveTasks.removeAll(deadTasks);
unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);
}
ret.setDeadTaskIds(deadTasks);
ret.setUnstoppedTaskIds(unstoppedTasks);
// Step 2: get all slots resource, free slots/ alive slots/ unstopped
// slots
getFreeSlots(supInfos, stormClusterState);
ret.setCluster(supInfos);
if (existingAssignment == null) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);
try {
AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
if (lastAssignment != null) {
ret.setOldAssignment(lastAssignment.getAssignment());
}
} catch (Exception e) {
LOG.warn("Fail to get old assignment", e);
}
} else {
ret.setOldAssignment(existingAssignment);
if (event.isScratch()) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
ret.setIsReassign(event.isReassign());
unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
} else {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
}
}
return ret;
}
Aggregations