Search in sources :

Example 1 with FailedAssignTopologyException

use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.

the class ServiceHandler method submitTopologyWithOpts.

/**
     * Submit one Topology
     *
     * @param topologyName        String: topology name
     * @param uploadedJarLocation String: already uploaded jar path
     * @param jsonConf            String: jsonConf serialize all toplogy configuration to
     *                            Json
     * @param topology            StormTopology: topology Object
     */
@SuppressWarnings("unchecked")
@Override
public String submitTopologyWithOpts(String topologyName, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException {
    LOG.info("Receive " + topologyName + ", uploadedJarLocation:" + uploadedJarLocation);
    long start = System.nanoTime();
    //check topologyname is valid
    if (!Common.charValidate(topologyName)) {
        throw new InvalidTopologyException(topologyName + " is not a valid topology name");
    }
    Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf);
    if (serializedConf == null) {
        LOG.warn("Failed to serialized Configuration");
        throw new InvalidTopologyException("Failed to serialize topology configuration");
    }
    Common.confValidate(serializedConf, data.getConf());
    boolean enableDeploy = ConfigExtension.getTopologyHotDeplogyEnable(serializedConf);
    try {
        checkTopologyActive(data, topologyName, enableDeploy);
    } catch (AlreadyAliveException e) {
        LOG.info(topologyName + " already exists ");
        throw e;
    } catch (NotAliveException e) {
        LOG.info(topologyName + " is not alive ");
        throw e;
    } catch (Throwable e) {
        LOG.info("Failed to check whether topology is alive or not", e);
        throw new TException(e);
    }
    if (enableDeploy) {
        LOG.info("deploy the topology");
        try {
            StormClusterState stormClusterState = data.getStormClusterState();
            String topologyId = Cluster.get_topology_id(stormClusterState, topologyName);
            if (topologyId == null) {
                throw new NotAliveException(topologyName);
            }
            LOG.info("start kill the old  topology {}", topologyId);
            Map oldConf = new HashMap();
            oldConf.putAll(conf);
            Map killedStormConf = StormConfig.read_nimbus_topology_conf(topologyId, data.getBlobStore());
            if (killedStormConf != null) {
                oldConf.putAll(killedStormConf);
            }
            NimbusUtils.transitionName(data, topologyName, true, StatusType.kill, 0);
            KillTopologyEvent.pushEvent(topologyId);
            notifyTopologyActionListener(topologyName, "killTopology");
            //wait all workers' are killed
            final long timeoutSeconds = ConfigExtension.getTaskCleanupTimeoutSec(oldConf);
            ConcurrentHashMap<String, Semaphore> topologyIdtoSem = data.getTopologyIdtoSem();
            if (!topologyIdtoSem.contains(topologyId)) {
                topologyIdtoSem.putIfAbsent(topologyId, new Semaphore(0));
            }
            Semaphore semaphore = topologyIdtoSem.get(topologyId);
            if (semaphore != null) {
                semaphore.tryAcquire(timeoutSeconds, TimeUnit.SECONDS);
                topologyIdtoSem.remove(semaphore);
            }
            LOG.info("success kill the old topology {}", topologyId);
        } catch (Exception e) {
            String errMsg = "Failed to kill topology " + topologyName;
            LOG.error(errMsg, e);
            throw new TException(errMsg);
        }
    }
    String topologyId = null;
    synchronized (data) {
        // avoid same topologies from being submitted at the same time
        Set<String> pendingTopologies = data.getPendingSubmitTopologies().buildMap().keySet();
        for (String cachTopologyId : pendingTopologies) {
            if (cachTopologyId.contains(topologyName + "-"))
                throw new AlreadyAliveException(topologyName + "  were submitted");
        }
        int counter = data.getSubmittedCount().incrementAndGet();
        topologyId = Common.topologyNameToId(topologyName, counter);
        data.getPendingSubmitTopologies().put(topologyId, null);
    }
    try {
        serializedConf.put(Config.TOPOLOGY_ID, topologyId);
        serializedConf.put(Config.TOPOLOGY_NAME, topologyName);
        Map<Object, Object> stormConf;
        stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology);
        LOG.info("Normalized configuration:" + stormConf);
        Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf);
        totalStormConf.putAll(stormConf);
        StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology, true);
        // this validates the structure of the topology
        Common.validate_basic(normalizedTopology, totalStormConf, topologyId);
        // don't need generate real topology, so skip Common.system_topology
        // Common.system_topology(totalStormConf, topology);
        StormClusterState stormClusterState = data.getStormClusterState();
        // create /local-dir/nimbus/topologyId/xxxx files
        setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology);
        // wait for blob replication before activate topology
        waitForDesiredCodeReplication(conf, topologyId);
        // generate TaskInfo for every bolt or spout in ZK
        // /ZK/tasks/topoologyId/xxx
        setupZkTaskInfo(conf, topologyId, stormClusterState);
        //mkdir topology error directory
        String path = Cluster.taskerror_storm_root(topologyId);
        stormClusterState.mkdir(path);
        // make assignments for a topology
        LOG.info("Submit for " + topologyName + " with conf " + serializedConf);
        makeAssignment(topologyName, topologyId, options.get_initial_status());
        // push start event after startup
        double metricsSampleRate = ConfigExtension.getMetricSampleRate(stormConf);
        StartTopologyEvent.pushEvent(topologyId, metricsSampleRate);
        notifyTopologyActionListener(topologyName, "submitTopology");
    } catch (FailedAssignTopologyException e) {
        StringBuilder sb = new StringBuilder();
        sb.append("Fail to sumbit topology, Root cause:");
        if (e.getMessage() == null) {
            sb.append("submit timeout");
        } else {
            sb.append(e.getMessage());
        }
        sb.append("\n\n");
        sb.append("topologyId:" + topologyId);
        sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
        LOG.error(sb.toString(), e);
        throw new TopologyAssignException(sb.toString());
    } catch (InvalidParameterException e) {
        StringBuilder sb = new StringBuilder();
        sb.append("Fail to sumbit topology ");
        sb.append(e.getMessage());
        sb.append(", cause:" + e.getCause());
        sb.append("\n\n");
        sb.append("topologyId:" + topologyId);
        sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
        LOG.error(sb.toString(), e);
        throw new InvalidParameterException(sb.toString());
    } catch (InvalidTopologyException e) {
        LOG.error("Topology is invalid. " + e.get_msg());
        throw e;
    } catch (Throwable e) {
        StringBuilder sb = new StringBuilder();
        sb.append("Fail to sumbit topology ");
        sb.append(e.getMessage());
        sb.append(", cause:" + e.getCause());
        sb.append("\n\n");
        sb.append("topologyId:" + topologyId);
        sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
        LOG.error(sb.toString(), e);
        throw new TopologyAssignException(sb.toString());
    } finally {
        // when make assignment for a topology,so remove the topologyid form
        // pendingSubmitTopologys
        data.getPendingSubmitTopologies().remove(topologyId);
        double spend = (System.nanoTime() - start) / TimeUtils.NS_PER_US;
        SimpleJStormMetric.updateNimbusHistogram("submitTopologyWithOpts", spend);
        LOG.info("submitTopologyWithOpts {} costs {}ms", topologyName, spend);
    }
    return topologyId;
}
Also used : TException(org.apache.thrift.TException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) StormTopology(backtype.storm.generated.StormTopology) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) Semaphore(java.util.concurrent.Semaphore) InvalidParameterException(java.security.InvalidParameterException) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) KeyNotFoundException(backtype.storm.generated.KeyNotFoundException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AlreadyAliveException(backtype.storm.generated.AlreadyAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) FileNotFoundException(java.io.FileNotFoundException) NotAliveException(backtype.storm.generated.NotAliveException) InvalidTopologyException(backtype.storm.generated.InvalidTopologyException) KeyAlreadyExistsException(backtype.storm.generated.KeyAlreadyExistsException) InvalidParameterException(java.security.InvalidParameterException) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) NotAliveException(backtype.storm.generated.NotAliveException) TopologyAssignException(backtype.storm.generated.TopologyAssignException) Map(java.util.Map) TreeMap(java.util.TreeMap) TimeCacheMap(com.alibaba.jstorm.utils.TimeCacheMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 2 with FailedAssignTopologyException

use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.

the class TaskScheduler method assignForTopologyMaster.

private void assignForTopologyMaster() {
    int taskId = context.getTopologyMasterTaskId();
    // Try to find a worker which is in a supervisor with most workers,
    // to avoid the balance problem when the assignment for other workers.
    ResourceWorkerSlot workerAssigned = null;
    int workerNumOfSuperv = 0;
    for (ResourceWorkerSlot workerSlot : taskContext.getWorkerToTaskNum().keySet()) {
        List<ResourceWorkerSlot> workers = taskContext.getSupervisorToWorker().get(workerSlot.getNodeId());
        if (workers != null && workers.size() > workerNumOfSuperv) {
            for (ResourceWorkerSlot worker : workers) {
                Set<Integer> tasks = worker.getTasks();
                if (tasks == null || tasks.size() == 0) {
                    workerAssigned = worker;
                    workerNumOfSuperv = workers.size();
                    break;
                }
            }
        }
    }
    if (workerAssigned == null)
        throw new FailedAssignTopologyException("there's no enough workers for the assignment of topology master");
    updateAssignedTasksOfWorker(taskId, workerAssigned);
    taskContext.getWorkerToTaskNum().remove(workerAssigned);
    assignments.add(workerAssigned);
    tasks.remove(taskId);
    workerNum--;
    LOG.info("assignForTopologyMaster, assignments=" + assignments);
}
Also used : FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException)

Example 3 with FailedAssignTopologyException

use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.

the class WorkerScheduler method getAvailableWorkers.

public List<ResourceWorkerSlot> getAvailableWorkers(DefaultTopologyAssignContext context, Set<Integer> needAssign, int allocWorkerNum) {
    int workersNum = getAvailableWorkersNum(context);
    if (workersNum < allocWorkerNum) {
        throw new FailedAssignTopologyException("there's no enough worker. allocWorkerNum=" + allocWorkerNum + ", availableWorkerNum=" + workersNum);
    }
    workersNum = allocWorkerNum;
    List<ResourceWorkerSlot> assignedWorkers = new ArrayList<ResourceWorkerSlot>();
    // userdefine assignments, but dont't try to use custom scheduling for
    // TM bolts now.
    getRightWorkers(context, needAssign, assignedWorkers, workersNum, getUserDefineWorkers(context, ConfigExtension.getUserDefineAssignment(context.getStormConf())));
    // old assignments
    if (ConfigExtension.isUseOldAssignment(context.getStormConf())) {
        getRightWorkers(context, needAssign, assignedWorkers, workersNum, context.getOldWorkers());
    } else if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE && context.isReassign() == false) {
        int cnt = 0;
        for (ResourceWorkerSlot worker : context.getOldWorkers()) {
            if (cnt < workersNum) {
                ResourceWorkerSlot resFreeWorker = new ResourceWorkerSlot();
                resFreeWorker.setPort(worker.getPort());
                resFreeWorker.setHostname(worker.getHostname());
                resFreeWorker.setNodeId(worker.getNodeId());
                assignedWorkers.add(resFreeWorker);
                cnt++;
            } else {
                break;
            }
        }
    }
    // calculate rest TM bolts
    int workersForSingleTM = 0;
    if (context.getAssignSingleWorkerForTM()) {
        for (Integer taskId : needAssign) {
            String componentName = context.getTaskToComponent().get(taskId);
            if (componentName.equals(Common.TOPOLOGY_MASTER_COMPONENT_ID)) {
                workersForSingleTM++;
            }
        }
    }
    LOG.info("Get workers from user define and old assignments: " + assignedWorkers);
    int restWokerNum = workersNum - assignedWorkers.size();
    if (restWokerNum < 0)
        throw new FailedAssignTopologyException("Too much workers are needed for user define or old assignments. workersNum=" + workersNum + ", assignedWokersNum=" + assignedWorkers.size());
    for (int i = 0; i < restWokerNum; i++) {
        assignedWorkers.add(new ResourceWorkerSlot());
    }
    List<SupervisorInfo> isolationSupervisors = this.getIsolationSupervisors(context);
    if (isolationSupervisors.size() != 0) {
        putAllWorkerToSupervisor(assignedWorkers, getResAvailSupervisors(isolationSupervisors));
    } else {
        putAllWorkerToSupervisor(assignedWorkers, getResAvailSupervisors(context.getCluster()));
    }
    this.setAllWorkerMemAndCpu(context.getStormConf(), assignedWorkers);
    LOG.info("Assigned workers=" + assignedWorkers);
    return assignedWorkers;
}
Also used : FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo)

Example 4 with FailedAssignTopologyException

use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.

the class TopologyAssign method mkLocalAssignment.

private static Set<ResourceWorkerSlot> mkLocalAssignment(TopologyAssignContext context) throws Exception {
    Set<ResourceWorkerSlot> result = new HashSet<ResourceWorkerSlot>();
    Map<String, SupervisorInfo> cluster = context.getCluster();
    if (cluster.size() != 1)
        throw new RuntimeException();
    SupervisorInfo localSupervisor = null;
    String supervisorId = null;
    for (Entry<String, SupervisorInfo> entry : cluster.entrySet()) {
        supervisorId = entry.getKey();
        localSupervisor = entry.getValue();
    }
    int port = -1;
    if (localSupervisor.getAvailableWorkerPorts().iterator().hasNext()) {
        port = localSupervisor.getAvailableWorkerPorts().iterator().next();
    } else {
        LOG.info(" amount of worker's ports is not enough");
        throw new FailedAssignTopologyException("Failed to make assignment " + ", due to no enough ports");
    }
    ResourceWorkerSlot worker = new ResourceWorkerSlot(supervisorId, port);
    worker.setTasks(new HashSet<Integer>(context.getAllTaskIds()));
    worker.setHostname(localSupervisor.getHostName());
    result.add(worker);
    return result;
}
Also used : FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo) HashSet(java.util.HashSet) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 5 with FailedAssignTopologyException

use of com.alibaba.jstorm.utils.FailedAssignTopologyException in project jstorm by alibaba.

the class TopologyAssign method prepareTopologyAssign.

protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();
    String topologyId = event.getTopologyId();
    ret.setTopologyId(topologyId);
    int topoMasterId = nimbusData.getTasksHeartbeat().get(topologyId).get_topologyMasterId();
    ret.setTopologyMasterTaskId(topoMasterId);
    LOG.info("prepareTopologyAssign, topoMasterId={}", topoMasterId);
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(topologyId, nimbusData.getBlobStore());
    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(topologyId, nimbusData.getBlobStore());
    ret.setRawTopology(rawTopology);
    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);
    StormClusterState stormClusterState = nimbusData.getStormClusterState();
    // get all running supervisor, don't need callback to watch supervisor
    Map<String, SupervisorInfo> supInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
    // init all AvailableWorkerPorts
    for (Entry<String, SupervisorInfo> supInfo : supInfos.entrySet()) {
        SupervisorInfo supervisor = supInfo.getValue();
        if (supervisor != null)
            supervisor.setAvailableWorkerPorts(supervisor.getWorkerPorts());
    }
    getAliveSupervsByHb(supInfos, nimbusConf);
    if (supInfos.size() == 0) {
        throw new FailedAssignTopologyException("Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }
    Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
    ret.setTaskToComponent(taskToComponent);
    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
        String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
        LOG.warn(errMsg);
        throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);
    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();
    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
        aliveTasks = getAliveTasks(topologyId, allTaskIds);
        /*
             * Check if the topology master task is alive first since all task 
             * heartbeat info is reported by topology master. 
             * If master is dead, do reassignment for topology master first.
             */
        if (aliveTasks.contains(topoMasterId) == false) {
            ResourceWorkerSlot worker = existingAssignment.getWorkerByTaskId(topoMasterId);
            deadTasks.addAll(worker.getTasks());
            Set<Integer> tempSet = new HashSet<Integer>(allTaskIds);
            tempSet.removeAll(deadTasks);
            aliveTasks.addAll(tempSet);
            aliveTasks.removeAll(deadTasks);
        } else {
            deadTasks.addAll(allTaskIds);
            deadTasks.removeAll(aliveTasks);
        }
        unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);
    }
    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);
    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);
    if (existingAssignment == null) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);
        try {
            AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
            if (lastAssignment != null) {
                ret.setOldAssignment(lastAssignment.getAssignment());
            }
        } catch (Exception e) {
            LOG.warn("Fail to get old assignment", e);
        }
    } else {
        ret.setOldAssignment(existingAssignment);
        if (event.isScratch()) {
            ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
            ret.setIsReassign(event.isReassign());
            unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
            ret.setUnstoppedWorkers(unstoppedWorkers);
        } else {
            ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
            unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
            ret.setUnstoppedWorkers(unstoppedWorkers);
        }
    }
    return ret;
}
Also used : AssignmentBak(com.alibaba.jstorm.schedule.AssignmentBak) TopologyAssignContext(com.alibaba.jstorm.schedule.TopologyAssignContext) HashMap(java.util.HashMap) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) StormTopology(backtype.storm.generated.StormTopology) IOException(java.io.IOException) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) IOException(java.io.IOException) Assignment(com.alibaba.jstorm.schedule.Assignment) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Aggregations

FailedAssignTopologyException (com.alibaba.jstorm.utils.FailedAssignTopologyException)6 SupervisorInfo (com.alibaba.jstorm.daemon.supervisor.SupervisorInfo)3 StormTopology (backtype.storm.generated.StormTopology)2 StormClusterState (com.alibaba.jstorm.cluster.StormClusterState)2 ResourceWorkerSlot (com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 AlreadyAliveException (backtype.storm.generated.AlreadyAliveException)1 InvalidTopologyException (backtype.storm.generated.InvalidTopologyException)1 KeyAlreadyExistsException (backtype.storm.generated.KeyAlreadyExistsException)1 KeyNotFoundException (backtype.storm.generated.KeyNotFoundException)1 NotAliveException (backtype.storm.generated.NotAliveException)1 TopologyAssignException (backtype.storm.generated.TopologyAssignException)1 Assignment (com.alibaba.jstorm.schedule.Assignment)1 AssignmentBak (com.alibaba.jstorm.schedule.AssignmentBak)1 TopologyAssignContext (com.alibaba.jstorm.schedule.TopologyAssignContext)1 TimeCacheMap (com.alibaba.jstorm.utils.TimeCacheMap)1