Search in sources :

Example 16 with Assignment

use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.

the class TopologyAssign method getFreeSlots.

/**
     * Get free resources
     * 
     * @param supervisorInfos
     * @param stormClusterState
     * @throws Exception
     */
public static void getFreeSlots(Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState) throws Exception {
    Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);
    for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();
        Set<ResourceWorkerSlot> workers = assignment.getWorkers();
        for (ResourceWorkerSlot worker : workers) {
            SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId());
            if (supervisorInfo == null) {
                // the supervisor is dead
                continue;
            }
            supervisorInfo.getAvailableWorkerPorts().remove(worker.getPort());
        }
    }
}
Also used : Assignment(com.alibaba.jstorm.schedule.Assignment) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 17 with Assignment

use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.

the class TopologyAssign method prepareTopologyAssign.

protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();
    String topologyId = event.getTopologyId();
    ret.setTopologyId(topologyId);
    int topoMasterId = nimbusData.getTasksHeartbeat().get(topologyId).get_topologyMasterId();
    ret.setTopologyMasterTaskId(topoMasterId);
    LOG.info("prepareTopologyAssign, topoMasterId={}", topoMasterId);
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(topologyId, nimbusData.getBlobStore());
    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(topologyId, nimbusData.getBlobStore());
    ret.setRawTopology(rawTopology);
    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);
    StormClusterState stormClusterState = nimbusData.getStormClusterState();
    // get all running supervisor, don't need callback to watch supervisor
    Map<String, SupervisorInfo> supInfos = Cluster.get_all_SupervisorInfo(stormClusterState, null);
    // init all AvailableWorkerPorts
    for (Entry<String, SupervisorInfo> supInfo : supInfos.entrySet()) {
        SupervisorInfo supervisor = supInfo.getValue();
        if (supervisor != null)
            supervisor.setAvailableWorkerPorts(supervisor.getWorkerPorts());
    }
    getAliveSupervsByHb(supInfos, nimbusConf);
    if (supInfos.size() == 0) {
        throw new FailedAssignTopologyException("Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }
    Map<Integer, String> taskToComponent = Cluster.get_all_task_component(stormClusterState, topologyId, null);
    ret.setTaskToComponent(taskToComponent);
    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
        String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
        LOG.warn(errMsg);
        throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);
    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();
    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
        aliveTasks = getAliveTasks(topologyId, allTaskIds);
        /*
             * Check if the topology master task is alive first since all task 
             * heartbeat info is reported by topology master. 
             * If master is dead, do reassignment for topology master first.
             */
        if (aliveTasks.contains(topoMasterId) == false) {
            ResourceWorkerSlot worker = existingAssignment.getWorkerByTaskId(topoMasterId);
            deadTasks.addAll(worker.getTasks());
            Set<Integer> tempSet = new HashSet<Integer>(allTaskIds);
            tempSet.removeAll(deadTasks);
            aliveTasks.addAll(tempSet);
            aliveTasks.removeAll(deadTasks);
        } else {
            deadTasks.addAll(allTaskIds);
            deadTasks.removeAll(aliveTasks);
        }
        unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);
    }
    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);
    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);
    if (existingAssignment == null) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);
        try {
            AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
            if (lastAssignment != null) {
                ret.setOldAssignment(lastAssignment.getAssignment());
            }
        } catch (Exception e) {
            LOG.warn("Fail to get old assignment", e);
        }
    } else {
        ret.setOldAssignment(existingAssignment);
        if (event.isScratch()) {
            ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
            ret.setIsReassign(event.isReassign());
            unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
            ret.setUnstoppedWorkers(unstoppedWorkers);
        } else {
            ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
            unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
            ret.setUnstoppedWorkers(unstoppedWorkers);
        }
    }
    return ret;
}
Also used : AssignmentBak(com.alibaba.jstorm.schedule.AssignmentBak) TopologyAssignContext(com.alibaba.jstorm.schedule.TopologyAssignContext) HashMap(java.util.HashMap) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) StormTopology(backtype.storm.generated.StormTopology) IOException(java.io.IOException) SupervisorInfo(com.alibaba.jstorm.daemon.supervisor.SupervisorInfo) FailedAssignTopologyException(com.alibaba.jstorm.utils.FailedAssignTopologyException) IOException(java.io.IOException) Assignment(com.alibaba.jstorm.schedule.Assignment) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 18 with Assignment

use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.

the class TopologyAssign method mkAssignment.

/**
     * make assignments for a topology The nimbus core function, this function has been totally rewrite
     * 
     * @throws Exception
     */
public Assignment mkAssignment(TopologyAssignEvent event) throws Exception {
    String topologyId = event.getTopologyId();
    LOG.info("Determining assignment for " + topologyId);
    TopologyAssignContext context = prepareTopologyAssign(event);
    Set<ResourceWorkerSlot> assignments = null;
    if (!StormConfig.local_mode(nimbusData.getConf())) {
        IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME);
        assignments = scheduler.assignTasks(context);
    } else {
        assignments = mkLocalAssignment(context);
    }
    Assignment assignment = null;
    if (assignments != null && assignments.size() > 0) {
        Map<String, String> nodeHost = getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments);
        Map<Integer, Integer> startTimes = getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments);
        String codeDir = (String) nimbusData.getConf().get(Config.STORM_LOCAL_DIR);
        assignment = new Assignment(codeDir, assignments, nodeHost, startTimes);
        //  the topology binary changed.
        if (event.isScaleTopology()) {
            assignment.setAssignmentType(Assignment.AssignmentType.ScaleTopology);
        }
        StormClusterState stormClusterState = nimbusData.getStormClusterState();
        stormClusterState.set_assignment(topologyId, assignment);
        // update task heartbeat's start time
        NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId);
        // @@@ TODO
        // Update metrics information in ZK when rebalance or reassignment
        // Only update metrics monitor status when creating topology
        // if (context.getAssignType() ==
        // TopologyAssignContext.ASSIGN_TYPE_REBALANCE
        // || context.getAssignType() ==
        // TopologyAssignContext.ASSIGN_TYPE_MONITOR)
        // NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment);
        NimbusUtils.updateTopologyTaskTimeout(nimbusData, topologyId);
        LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment);
    }
    return assignment;
}
Also used : Assignment(com.alibaba.jstorm.schedule.Assignment) StormClusterState(com.alibaba.jstorm.cluster.StormClusterState) IToplogyScheduler(com.alibaba.jstorm.schedule.IToplogyScheduler) TopologyAssignContext(com.alibaba.jstorm.schedule.TopologyAssignContext) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 19 with Assignment

use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.

the class RefreshEvent method doRefreshTopologies.

/**
     * refresh metric settings of topologies and sync metric meta from local cache
     */
@SuppressWarnings("unchecked")
private void doRefreshTopologies() {
    for (String topology : JStormMetrics.SYS_TOPOLOGIES) {
        if (!context.getTopologyMetricContexts().containsKey(topology)) {
            LOG.info("adding {} to metric context.", topology);
            Map conf = new HashMap();
            if (topology.equals(JStormMetrics.CLUSTER_METRIC_KEY)) {
                //there's no need to consider sample rate when cluster metrics merge
                conf.put(ConfigExtension.TOPOLOGY_METRIC_SAMPLE_RATE, 1.0);
            }
            Set<ResourceWorkerSlot> workerSlot = Sets.newHashSet(new ResourceWorkerSlot());
            TopologyMetricContext metricContext = new TopologyMetricContext(topology, workerSlot, conf);
            context.getTopologyMetricContexts().putIfAbsent(topology, metricContext);
            syncMetaFromCache(topology, context.getTopologyMetricContexts().get(topology));
            syncMetaFromRemote(topology, context.getTopologyMetricContexts().get(topology));
        }
    }
    Map<String, Assignment> assignMap;
    try {
        assignMap = Cluster.get_all_assignment(context.getStormClusterState(), null);
        for (Entry<String, Assignment> entry : assignMap.entrySet()) {
            String topologyId = entry.getKey();
            Assignment assignment = entry.getValue();
            TopologyMetricContext metricContext = context.getTopologyMetricContexts().get(topologyId);
            if (metricContext == null) {
                metricContext = new TopologyMetricContext(assignment.getWorkers());
                metricContext.setTaskNum(NimbusUtils.getTopologyTaskNum(assignment));
                syncMetaFromCache(topologyId, metricContext);
                LOG.info("adding {} to metric context.", topologyId);
                context.getTopologyMetricContexts().put(topologyId, metricContext);
            } else {
                boolean modify = false;
                if (metricContext.getTaskNum() != NimbusUtils.getTopologyTaskNum(assignment)) {
                    modify = true;
                    metricContext.setTaskNum(NimbusUtils.getTopologyTaskNum(assignment));
                }
                if (!assignment.getWorkers().equals(metricContext.getWorkerSet())) {
                    modify = true;
                    metricContext.setWorkerSet(assignment.getWorkers());
                }
                // we may need to sync meta when task num/workers change
                metricContext.setSyncMeta(!modify);
            }
        }
    } catch (Exception e1) {
        LOG.warn("Failed to get assignments");
        return;
    }
    List<String> removing = new ArrayList<>();
    for (String topologyId : context.getTopologyMetricContexts().keySet()) {
        if (!JStormMetrics.SYS_TOPOLOGY_SET.contains(topologyId) && !assignMap.containsKey(topologyId)) {
            removing.add(topologyId);
        }
    }
    for (String topologyId : removing) {
        LOG.info("removing topology:{}", topologyId);
        RemoveTopologyEvent.pushEvent(topologyId);
    }
}
Also used : HashMap(java.util.HashMap) TopologyMetricContext(com.alibaba.jstorm.metric.TopologyMetricContext) ArrayList(java.util.ArrayList) Assignment(com.alibaba.jstorm.schedule.Assignment) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ResourceWorkerSlot(com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)

Example 20 with Assignment

use of com.alibaba.jstorm.schedule.Assignment in project jstorm by alibaba.

the class SyncSupervisorEvent method run.

@Override
public void run() {
    LOG.debug("Synchronizing supervisor, interval seconds:" + TimeUtils.time_delta(lastTime));
    lastTime = TimeUtils.current_time_secs();
    //In order to ensure that the status is the same for each execution of syncsupervisor
    MachineCheckStatus checkStatus = new MachineCheckStatus();
    checkStatus.SetType(heartbeat.getCheckStatus().getType());
    try {
        RunnableCallback syncCallback = new EventManagerZkPusher(this, syncSupEventManager);
        Map<String, Integer> assignmentVersion = (Map<String, Integer>) localState.get(Common.LS_LOCAL_ZK_ASSIGNMENT_VERSION);
        if (assignmentVersion == null) {
            assignmentVersion = new HashMap<String, Integer>();
        }
        Map<String, Assignment> assignments = (Map<String, Assignment>) localState.get(Common.LS_LOCAl_ZK_ASSIGNMENTS);
        if (assignments == null) {
            assignments = new HashMap<String, Assignment>();
        }
        LOG.debug("get local assignments  " + assignments);
        LOG.debug("get local assignments version " + assignmentVersion);
        if (checkStatus.getType().equals(MachineCheckStatus.StatusType.panic) || checkStatus.getType().equals(MachineCheckStatus.StatusType.error)) {
            // if statuts is pannic or error, it will clear all assignments and kill all workers;
            assignmentVersion.clear();
            assignments.clear();
            LOG.warn("Supervisor Machine Check Status :" + checkStatus.getType() + ", so kill all workers.");
        } else {
            getAllAssignments(assignmentVersion, assignments, syncCallback);
        }
        LOG.debug("Get all assignments " + assignments);
        /**
             * Step 2: get topologyIds list from STORM-LOCAL-DIR/supervisor/stormdist/
             */
        List<String> downloadedTopologyIds = StormConfig.get_supervisor_toplogy_list(conf);
        LOG.debug("Downloaded storm ids: " + downloadedTopologyIds);
        /**
             * Step 3: get <port,LocalAssignments> from ZK local node's assignment
             */
        Map<Integer, LocalAssignment> zkAssignment;
        zkAssignment = getLocalAssign(stormClusterState, supervisorId, assignments);
        Map<Integer, LocalAssignment> localAssignment;
        /**
             * Step 4: writer local assignment to LocalState
             */
        try {
            LOG.debug("Writing local assignment " + zkAssignment);
            localAssignment = (Map<Integer, LocalAssignment>) localState.get(Common.LS_LOCAL_ASSIGNMENTS);
            if (localAssignment == null) {
                localAssignment = new HashMap<Integer, LocalAssignment>();
            }
            localState.put(Common.LS_LOCAL_ASSIGNMENTS, zkAssignment);
        } catch (IOException e) {
            LOG.error("put LS_LOCAL_ASSIGNMENTS " + zkAssignment + " of localState failed");
            throw e;
        }
        /**
             * Step 5: get reloaded topologys
             */
        Set<String> updateTopologys;
        updateTopologys = getUpdateTopologys(localAssignment, zkAssignment, assignments);
        Set<String> reDownloadTopologys = getNeedReDownloadTopologys(localAssignment);
        if (reDownloadTopologys != null) {
            updateTopologys.addAll(reDownloadTopologys);
        }
        /**
             * Step 6: download code from ZK
             */
        Map<String, String> topologyCodes = getTopologyCodeLocations(assignments, supervisorId);
        // downloadFailedTopologyIds which can't finished download binary from nimbus
        Set<String> downloadFailedTopologyIds = new HashSet<String>();
        downloadTopology(topologyCodes, downloadedTopologyIds, updateTopologys, assignments, downloadFailedTopologyIds);
        /**
             * Step 7: remove any downloaded useless topology
             */
        removeUselessTopology(topologyCodes, downloadedTopologyIds);
        /**
             * Step 7: push syncProcesses Event
             */
        // processEventManager.add(syncProcesses);
        syncProcesses.run(zkAssignment, downloadFailedTopologyIds);
        // If everything is OK, set the trigger to update heartbeat of
        // supervisor
        heartbeat.updateHbTrigger(true);
        try {
            // update localState
            localState.put(Common.LS_LOCAL_ZK_ASSIGNMENT_VERSION, assignmentVersion);
            localState.put(Common.LS_LOCAl_ZK_ASSIGNMENTS, assignments);
        } catch (IOException e) {
            LOG.error("put LS_LOCAL_ZK_ASSIGNMENT_VERSION&&LS_LOCAl_ZK_ASSIGNMENTS  failed");
            throw e;
        }
    } catch (Exception e) {
        LOG.error("Failed to Sync Supervisor", e);
    // throw new RuntimeException(e);
    }
    if (checkStatus.getType().equals(MachineCheckStatus.StatusType.panic)) {
        // if statuts is pannic, it will kill supervisor;
        JStormUtils.halt_process(0, "Supervisor Machine Check Status : Panic , !!!!shutdown!!!!");
    }
}
Also used : RunnableCallback(com.alibaba.jstorm.callback.RunnableCallback) IOException(java.io.IOException) TException(org.apache.thrift.TException) IOException(java.io.IOException) FileExistsException(org.apache.commons.io.FileExistsException) LocalAssignment(com.alibaba.jstorm.daemon.worker.LocalAssignment) Assignment(com.alibaba.jstorm.schedule.Assignment) LocalAssignment(com.alibaba.jstorm.daemon.worker.LocalAssignment) EventManagerZkPusher(com.alibaba.jstorm.event.EventManagerZkPusher)

Aggregations

Assignment (com.alibaba.jstorm.schedule.Assignment)24 ResourceWorkerSlot (com.alibaba.jstorm.schedule.default_assign.ResourceWorkerSlot)11 HashMap (java.util.HashMap)8 StormClusterState (com.alibaba.jstorm.cluster.StormClusterState)7 SupervisorInfo (com.alibaba.jstorm.daemon.supervisor.SupervisorInfo)6 IOException (java.io.IOException)6 LocalAssignment (com.alibaba.jstorm.daemon.worker.LocalAssignment)5 FailedAssignTopologyException (com.alibaba.jstorm.utils.FailedAssignTopologyException)5 FileNotFoundException (java.io.FileNotFoundException)5 ArrayList (java.util.ArrayList)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 TException (org.apache.thrift.TException)5 AlreadyAliveException (backtype.storm.generated.AlreadyAliveException)4 InvalidTopologyException (backtype.storm.generated.InvalidTopologyException)4 KeyAlreadyExistsException (backtype.storm.generated.KeyAlreadyExistsException)4 KeyNotFoundException (backtype.storm.generated.KeyNotFoundException)4 NotAliveException (backtype.storm.generated.NotAliveException)4 TopologyAssignException (backtype.storm.generated.TopologyAssignException)4 InvalidParameterException (java.security.InvalidParameterException)4 SupervisorSummary (backtype.storm.generated.SupervisorSummary)3