use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.
the class ServiceHandler method submitTopologyWithOpts.
/**
* Submit one Topology
*
* @param topologyName String: topology name
* @param uploadedJarLocation String: already uploaded jar path
* @param jsonConf String: jsonConf serialize all toplogy configuration to
* Json
* @param topology StormTopology: topology Object
*/
@SuppressWarnings("unchecked")
@Override
public String submitTopologyWithOpts(String topologyName, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException {
LOG.info("Receive " + topologyName + ", uploadedJarLocation:" + uploadedJarLocation);
long start = System.nanoTime();
//check topologyname is valid
if (!Common.charValidate(topologyName)) {
throw new InvalidTopologyException(topologyName + " is not a valid topology name");
}
Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf);
if (serializedConf == null) {
LOG.warn("Failed to serialized Configuration");
throw new InvalidTopologyException("Failed to serialize topology configuration");
}
Common.confValidate(serializedConf, data.getConf());
boolean enableDeploy = ConfigExtension.getTopologyHotDeplogyEnable(serializedConf);
try {
checkTopologyActive(data, topologyName, enableDeploy);
} catch (AlreadyAliveException e) {
LOG.info(topologyName + " already exists ");
throw e;
} catch (NotAliveException e) {
LOG.info(topologyName + " is not alive ");
throw e;
} catch (Throwable e) {
LOG.info("Failed to check whether topology is alive or not", e);
throw new TException(e);
}
if (enableDeploy) {
LOG.info("deploy the topology");
try {
StormClusterState stormClusterState = data.getStormClusterState();
String topologyId = Cluster.get_topology_id(stormClusterState, topologyName);
if (topologyId == null) {
throw new NotAliveException(topologyName);
}
LOG.info("start kill the old topology {}", topologyId);
Map oldConf = new HashMap();
oldConf.putAll(conf);
Map killedStormConf = StormConfig.read_nimbus_topology_conf(topologyId, data.getBlobStore());
if (killedStormConf != null) {
oldConf.putAll(killedStormConf);
}
NimbusUtils.transitionName(data, topologyName, true, StatusType.kill, 0);
KillTopologyEvent.pushEvent(topologyId);
notifyTopologyActionListener(topologyName, "killTopology");
//wait all workers' are killed
final long timeoutSeconds = ConfigExtension.getTaskCleanupTimeoutSec(oldConf);
ConcurrentHashMap<String, Semaphore> topologyIdtoSem = data.getTopologyIdtoSem();
if (!topologyIdtoSem.contains(topologyId)) {
topologyIdtoSem.putIfAbsent(topologyId, new Semaphore(0));
}
Semaphore semaphore = topologyIdtoSem.get(topologyId);
if (semaphore != null) {
semaphore.tryAcquire(timeoutSeconds, TimeUnit.SECONDS);
topologyIdtoSem.remove(semaphore);
}
LOG.info("success kill the old topology {}", topologyId);
} catch (Exception e) {
String errMsg = "Failed to kill topology " + topologyName;
LOG.error(errMsg, e);
throw new TException(errMsg);
}
}
String topologyId = null;
synchronized (data) {
// avoid same topologies from being submitted at the same time
Set<String> pendingTopologies = data.getPendingSubmitTopologies().buildMap().keySet();
for (String cachTopologyId : pendingTopologies) {
if (cachTopologyId.contains(topologyName + "-"))
throw new AlreadyAliveException(topologyName + " were submitted");
}
int counter = data.getSubmittedCount().incrementAndGet();
topologyId = Common.topologyNameToId(topologyName, counter);
data.getPendingSubmitTopologies().put(topologyId, null);
}
try {
serializedConf.put(Config.TOPOLOGY_ID, topologyId);
serializedConf.put(Config.TOPOLOGY_NAME, topologyName);
Map<Object, Object> stormConf;
stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology);
LOG.info("Normalized configuration:" + stormConf);
Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf);
totalStormConf.putAll(stormConf);
StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology, true);
// this validates the structure of the topology
Common.validate_basic(normalizedTopology, totalStormConf, topologyId);
// don't need generate real topology, so skip Common.system_topology
// Common.system_topology(totalStormConf, topology);
StormClusterState stormClusterState = data.getStormClusterState();
// create /local-dir/nimbus/topologyId/xxxx files
setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology);
// wait for blob replication before activate topology
waitForDesiredCodeReplication(conf, topologyId);
// generate TaskInfo for every bolt or spout in ZK
// /ZK/tasks/topoologyId/xxx
setupZkTaskInfo(conf, topologyId, stormClusterState);
//mkdir topology error directory
String path = Cluster.taskerror_storm_root(topologyId);
stormClusterState.mkdir(path);
// make assignments for a topology
LOG.info("Submit for " + topologyName + " with conf " + serializedConf);
makeAssignment(topologyName, topologyId, options.get_initial_status());
// push start event after startup
double metricsSampleRate = ConfigExtension.getMetricSampleRate(stormConf);
StartTopologyEvent.pushEvent(topologyId, metricsSampleRate);
notifyTopologyActionListener(topologyName, "submitTopology");
} catch (FailedAssignTopologyException e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology, Root cause:");
if (e.getMessage() == null) {
sb.append("submit timeout");
} else {
sb.append(e.getMessage());
}
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new TopologyAssignException(sb.toString());
} catch (InvalidParameterException e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology ");
sb.append(e.getMessage());
sb.append(", cause:" + e.getCause());
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new InvalidParameterException(sb.toString());
} catch (InvalidTopologyException e) {
LOG.error("Topology is invalid. " + e.get_msg());
throw e;
} catch (Throwable e) {
StringBuilder sb = new StringBuilder();
sb.append("Fail to sumbit topology ");
sb.append(e.getMessage());
sb.append(", cause:" + e.getCause());
sb.append("\n\n");
sb.append("topologyId:" + topologyId);
sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
LOG.error(sb.toString(), e);
throw new TopologyAssignException(sb.toString());
} finally {
// when make assignment for a topology,so remove the topologyid form
// pendingSubmitTopologys
data.getPendingSubmitTopologies().remove(topologyId);
double spend = (System.nanoTime() - start) / TimeUtils.NS_PER_US;
SimpleJStormMetric.updateNimbusHistogram("submitTopologyWithOpts", spend);
LOG.info("submitTopologyWithOpts {} costs {}ms", topologyName, spend);
}
return topologyId;
}
use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.
the class TaskScheduler method assignForTopologyMaster.
private void assignForTopologyMaster() {
int taskId = context.getTopologyMasterTaskId();
// Try to find a worker which is in a supervisor with most workers,
// to avoid the balance problem when the assignment for other workers.
ResourceWorkerSlot workerAssigned = null;
int workerNumOfSuperv = 0;
for (ResourceWorkerSlot workerSlot : taskContext.getWorkerToTaskNum().keySet()) {
List<ResourceWorkerSlot> workers = taskContext.getSupervisorToWorker().get(workerSlot.getNodeId());
if (workers != null && workers.size() > workerNumOfSuperv) {
for (ResourceWorkerSlot worker : workers) {
Set<Integer> tasks = worker.getTasks();
if (tasks == null || tasks.size() == 0) {
workerAssigned = worker;
workerNumOfSuperv = workers.size();
break;
}
}
}
}
if (workerAssigned == null)
throw new FailedAssignTopologyException("there's no enough workers for the assignment of topology master");
updateAssignedTasksOfWorker(taskId, workerAssigned);
taskContext.getWorkerToTaskNum().remove(workerAssigned);
assignments.add(workerAssigned);
tasks.remove(taskId);
workerNum--;
LOG.info("assignForTopologyMaster, assignments=" + assignments);
}
use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.
the class WorkerScheduler method getAvailableWorkers.
public List<ResourceWorkerSlot> getAvailableWorkers(DefaultTopologyAssignContext context, Set<Integer> needAssign, int allocWorkerNum) {
int workersNum = getAvailableWorkersNum(context);
if (workersNum < allocWorkerNum) {
throw new FailedAssignTopologyException("there's no enough worker. allocWorkerNum=" + allocWorkerNum + ", availableWorkerNum=" + workersNum);
}
workersNum = allocWorkerNum;
List<ResourceWorkerSlot> assignedWorkers = new ArrayList<ResourceWorkerSlot>();
// userdefine assignments, but dont't try to use custom scheduling for
// TM bolts now.
getRightWorkers(context, needAssign, assignedWorkers, workersNum, getUserDefineWorkers(context, ConfigExtension.getUserDefineAssignment(context.getStormConf())));
// old assignments
if (ConfigExtension.isUseOldAssignment(context.getStormConf())) {
getRightWorkers(context, needAssign, assignedWorkers, workersNum, context.getOldWorkers());
} else if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE && context.isReassign() == false) {
int cnt = 0;
for (ResourceWorkerSlot worker : context.getOldWorkers()) {
if (cnt < workersNum) {
ResourceWorkerSlot resFreeWorker = new ResourceWorkerSlot();
resFreeWorker.setPort(worker.getPort());
resFreeWorker.setHostname(worker.getHostname());
resFreeWorker.setNodeId(worker.getNodeId());
assignedWorkers.add(resFreeWorker);
cnt++;
} else {
break;
}
}
}
// calculate rest TM bolts
int workersForSingleTM = 0;
if (context.getAssignSingleWorkerForTM()) {
for (Integer taskId : needAssign) {
String componentName = context.getTaskToComponent().get(taskId);
if (componentName.equals(Common.TOPOLOGY_MASTER_COMPONENT_ID)) {
workersForSingleTM++;
}
}
}
LOG.info("Get workers from user define and old assignments: " + assignedWorkers);
int restWokerNum = workersNum - assignedWorkers.size();
if (restWokerNum < 0)
throw new FailedAssignTopologyException("Too much workers are needed for user define or old assignments. workersNum=" + workersNum + ", assignedWokersNum=" + assignedWorkers.size());
for (int i = 0; i < restWokerNum; i++) {
assignedWorkers.add(new ResourceWorkerSlot());
}
List<SupervisorInfo> isolationSupervisors = this.getIsolationSupervisors(context);
if (isolationSupervisors.size() != 0) {
putAllWorkerToSupervisor(assignedWorkers, getResAvailSupervisors(isolationSupervisors));
} else {
putAllWorkerToSupervisor(assignedWorkers, getResAvailSupervisors(context.getCluster()));
}
this.setAllWorkerMemAndCpu(context.getStormConf(), assignedWorkers);
LOG.info("Assigned workers=" + assignedWorkers);
return assignedWorkers;
}
use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.
the class TopologyAssign method mkLocalAssignment.
private static Set<ResourceWorkerSlot> mkLocalAssignment(TopologyAssignContext context) throws Exception {
Set<ResourceWorkerSlot> result = new HashSet<ResourceWorkerSlot>();
Map<String, SupervisorInfo> cluster = context.getCluster();
if (cluster.size() != 1)
throw new RuntimeException();
SupervisorInfo localSupervisor = null;
String supervisorId = null;
for (Entry<String, SupervisorInfo> entry : cluster.entrySet()) {
supervisorId = entry.getKey();
localSupervisor = entry.getValue();
}
int port = -1;
if (localSupervisor.getAvailableWorkerPorts().iterator().hasNext()) {
port = localSupervisor.getAvailableWorkerPorts().iterator().next();
} else {
LOG.info(" amount of worker's ports is not enough");
throw new FailedAssignTopologyException("Failed to make assignment " + ", due to no enough ports");
}
ResourceWorkerSlot worker = new ResourceWorkerSlot(supervisorId, port);
worker.setTasks(new HashSet<Integer>(context.getAllTaskIds()));
worker.setHostname(localSupervisor.getHostName());
result.add(worker);
return result;
}
use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.
the class TopologyAssign method prepareTopologyAssign.
protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception {
TopologyAssignContext ret = new TopologyAssignContext();
String topologyId = event.getTopologyId();
ret.setTopologyId(topologyId);
int topoMasterId = nimbusData.getTasksHeartbeat().get(topologyId).get_topologyMasterId();
ret.setTopologyMasterTaskId(topoMasterId);
LOG.info("prepareTopologyAssign, topoMasterId={}", topoMasterId);
Map<Object, Object> nimbusConf = nimbusData.getConf();
Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(topologyId, nimbusData.getBlobStore());
StormTopology rawTopology = StormConfig.read_nimbus_topology_code(topologyId, nimbusData.getBlobStore());
ret.setRawTopology(rawTopology);
Map stormConf = new HashMap();
stormConf.putAll(nimbusConf);
stormConf.putAll(topologyConf);
ret.setStormConf(stormConf);
StormClusterState stormClusterState = nimbusData.getStormClusterState();
// get all running supervisor, don't need callback to watch supervisor
Map<String, SupervisorInfo> supInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
// init all AvailableWorkerPorts
for (Entry<String, SupervisorInfo> supInfo : supInfos.entrySet()) {
SupervisorInfo supervisor = supInfo.getValue();
if (supervisor != null)
supervisor.setAvailableWorkerPorts(supervisor.getWorkerPorts());
}
getAliveSupervsByHb(supInfos, nimbusConf);
if (supInfos.size() == 0) {
throw new FailedAssignTopologyException("Failed to make assignment " + topologyId + ", due to no alive supervisor");
}
Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
ret.setTaskToComponent(taskToComponent);
// get taskids /ZK/tasks/topologyId
Set<Integer> allTaskIds = taskToComponent.keySet();
if (allTaskIds == null || allTaskIds.size() == 0) {
String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
LOG.warn(errMsg);
throw new IOException(errMsg);
}
ret.setAllTaskIds(allTaskIds);
Set<Integer> aliveTasks = new HashSet<Integer>();
// unstoppedTasks are tasks which are alive on no supervisor's(dead)
// machine
Set<Integer> unstoppedTasks = new HashSet<Integer>();
Set<Integer> deadTasks = new HashSet<Integer>();
Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();
Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
if (existingAssignment != null) {
aliveTasks = getAliveTasks(topologyId, allTaskIds);
/*
* Check if the topology master task is alive first since all task
* heartbeat info is reported by topology master.
* If master is dead, do reassignment for topology master first.
*/
if (aliveTasks.contains(topoMasterId) == false) {
ResourceWorkerSlot worker = existingAssignment.getWorkerByTaskId(topoMasterId);
deadTasks.addAll(worker.getTasks());
Set<Integer> tempSet = new HashSet<Integer>(allTaskIds);
tempSet.removeAll(deadTasks);
aliveTasks.addAll(tempSet);
aliveTasks.removeAll(deadTasks);
} else {
deadTasks.addAll(allTaskIds);
deadTasks.removeAll(aliveTasks);
}
unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);
}
ret.setDeadTaskIds(deadTasks);
ret.setUnstoppedTaskIds(unstoppedTasks);
// Step 2: get all slots resource, free slots/ alive slots/ unstopped
// slots
getFreeSlots(supInfos, stormClusterState);
ret.setCluster(supInfos);
if (existingAssignment == null) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);
try {
AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
if (lastAssignment != null) {
ret.setOldAssignment(lastAssignment.getAssignment());
}
} catch (Exception e) {
LOG.warn("Fail to get old assignment", e);
}
} else {
ret.setOldAssignment(existingAssignment);
if (event.isScratch()) {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
ret.setIsReassign(event.isReassign());
unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
} else {
ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
ret.setUnstoppedWorkers(unstoppedWorkers);
}
}
return ret;
}
Aggregations