Search in sources :

Example 6 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class JobRebalancer method computeResourceMapping.

private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevTaskToInstanceStateAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
    TargetState jobTgtState = workflowConfig.getTargetState();
    TaskState jobState = workflowCtx.getJobState(jobResource);
    TaskState workflowState = workflowCtx.getWorkflowState();
    if (jobState == TaskState.IN_PROGRESS && (isTimeout(jobCtx.getStartTime(), jobCfg.getTimeout()) || TaskState.TIMED_OUT.equals(workflowState))) {
        jobState = TaskState.TIMING_OUT;
        workflowCtx.setJobState(jobResource, TaskState.TIMING_OUT);
    } else if (jobState != TaskState.TIMING_OUT && jobState != TaskState.FAILING) {
        // Update running status in workflow context
        if (jobTgtState == TargetState.STOP) {
            if (checkJobStopped(jobCtx)) {
                workflowCtx.setJobState(jobResource, TaskState.STOPPED);
            } else {
                workflowCtx.setJobState(jobResource, TaskState.STOPPING);
            // Workflow has been stopped if all in progress jobs are stopped
            if (isWorkflowStopped(workflowCtx, workflowConfig)) {
            } else {
        } else {
            workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
            // Workflow is in progress if any task is in progress
    // Used to keep track of tasks that have already been assigned to instances.
    Set<Integer> assignedPartitions = new HashSet<Integer>();
    // Used to keep track of tasks that have failed, but whose failure is acceptable
    Set<Integer> skippedPartitions = new HashSet<Integer>();
    // Keeps a mapping of (partition) -> (instance, state)
    Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
    Set<String> excludedInstances = getExcludedInstances(jobResource, workflowConfig, cache);
    // Process all the current assignments of tasks.
    TaskAssignmentCalculator taskAssignmentCal = getAssignmentCalulator(jobCfg);
    Set<Integer> allPartitions = taskAssignmentCal.getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache.getIdealStates());
    if (allPartitions == null || allPartitions.isEmpty()) {
        // Empty target partitions, mark the job as FAILED.
        String failureMsg = "Empty task partition mapping for job " + jobResource + ", marked the job as FAILED!";;
        failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
        markAllPartitionsError(jobCtx, TaskPartitionState.ERROR, false);
        return new ResourceAssignment(jobResource);
    Map<String, SortedSet<Integer>> prevInstanceToTaskAssignments = getPrevInstanceToTaskAssignments(liveInstances, prevTaskToInstanceStateAssignment, allPartitions);
    long currentTime = System.currentTimeMillis();
    LOG.debug("All partitions: " + allPartitions + " taskAssignment: " + prevInstanceToTaskAssignments + " excludedInstances: " + excludedInstances);
    // Iterate through all instances
    for (String instance : prevInstanceToTaskAssignments.keySet()) {
        if (excludedInstances.contains(instance)) {
        Set<Integer> pSet = prevInstanceToTaskAssignments.get(instance);
        // Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
        // TASK_ERROR, ERROR.
        Set<Integer> donePartitions = new TreeSet<Integer>();
        for (int pId : pSet) {
            final String pName = pName(jobResource, pId);
            TaskPartitionState currState = updateJobContextAndGetTaskCurrentState(currStateOutput, jobResource, pId, pName, instance, jobCtx);
            // Check for pending state transitions on this (partition, instance).
            Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
            if (pendingMessage != null && !pendingMessage.getToState().equals( {
                processTaskWithPendingMessage(prevTaskToInstanceStateAssignment, pId, pName, instance, pendingMessage, jobState, currState, paMap, assignedPartitions);
            // Process any requested state transitions.
            String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
            if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
                TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
                if (requestedState.equals(currState)) {
                    LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
                paMap.put(pId, new PartitionAssignment(instance,;
                LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
            switch(currState) {
                case RUNNING:
                        TaskPartitionState nextState = TaskPartitionState.RUNNING;
                        if (jobState == TaskState.TIMING_OUT) {
                            nextState = TaskPartitionState.TASK_ABORTED;
                        } else if (jobTgtState == TargetState.STOP) {
                            nextState = TaskPartitionState.STOPPED;
                        paMap.put(pId, new PartitionAssignment(instance,;
                        LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
                case STOPPED:
                        TaskPartitionState nextState;
                        if (jobTgtState == TargetState.START) {
                            nextState = TaskPartitionState.RUNNING;
                        } else {
                            nextState = TaskPartitionState.STOPPED;
                        paMap.put(pId, new PartitionAssignment(instance,;
                        LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
                case COMPLETED:
                        // The task has completed on this partition. Mark as such in the context object.
                        LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
                        markPartitionCompleted(jobCtx, pId);
                case TIMED_OUT:
                case TASK_ERROR:
                case TASK_ABORTED:
                case ERROR:
                        // The task may be rescheduled on a different instance.
                        LOG.debug(String.format("Task partition %s has error state %s with msg %s. Marking as such in rebalancer context.", pName, currState, jobCtx.getPartitionInfo(pId)));
                        markPartitionError(jobCtx, pId, currState, true);
                        // After all tasks are aborted, they will be dropped, because of job timeout.
                        if (jobState != TaskState.TIMED_OUT && jobState != TaskState.TIMING_OUT) {
                            if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask() || currState.equals(TaskPartitionState.TASK_ABORTED) || currState.equals(TaskPartitionState.ERROR)) {
                                LOG.debug("skippedPartitions:" + skippedPartitions);
                            } else {
                                // Mark the task to be started at some later time (if enabled)
                                markPartitionDelayed(jobCfg, jobCtx, pId);
                case INIT:
                case DROPPED:
                        // currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
                        LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
                    throw new AssertionError("Unknown enum symbol: " + currState);
        // Remove the set of task partitions that are completed or in one of the error states.
    addGiveupPartitions(skippedPartitions, jobCtx, allPartitions, jobCfg);
    if (jobState == TaskState.IN_PROGRESS && skippedPartitions.size() > jobCfg.getFailureThreshold()) {
        if (isJobFinished(jobCtx, jobResource, currStateOutput)) {
            failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
            return buildEmptyAssignment(jobResource, currStateOutput);
        workflowCtx.setJobState(jobResource, TaskState.FAILING);
        // Drop all assigned but not given-up tasks
        for (int pId : jobCtx.getPartitionSet()) {
            String instance = jobCtx.getAssignedParticipant(pId);
            if (jobCtx.getPartitionState(pId) != null && !isTaskGivenup(jobCtx, jobCfg, pId)) {
                paMap.put(pId, new PartitionAssignment(instance,;
            Partition partition = new Partition(pName(jobResource, pId));
            Message pendingMessage = currStateOutput.getPendingState(jobResource, partition, instance);
            // so that Helix will cancel the transition.
            if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT && pendingMessage != null) {
                paMap.put(pId, new PartitionAssignment(instance,;
        return toResourceAssignment(jobResource, paMap);
    if (jobState == TaskState.FAILING && isJobFinished(jobCtx, jobResource, currStateOutput)) {
        failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
        return buildEmptyAssignment(jobResource, currStateOutput);
    if (isJobComplete(jobCtx, allPartitions, jobCfg)) {
        markJobComplete(jobResource, jobCtx, workflowConfig, workflowCtx, cache.getJobConfigMap());
        _clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.COMPLETED, jobCtx.getFinishTime() - jobCtx.getStartTime());
        TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
        return buildEmptyAssignment(jobResource, currStateOutput);
    // can be dropped(note that Helix doesn't track whether the drop is success or not).
    if (jobState == TaskState.TIMING_OUT && isJobFinished(jobCtx, jobResource, currStateOutput)) {
        workflowCtx.setJobState(jobResource, TaskState.TIMED_OUT);
        // Mark all INIT task to TASK_ABORTED
        for (int pId : jobCtx.getPartitionSet()) {
            if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT) {
                jobCtx.setPartitionState(pId, TaskPartitionState.TASK_ABORTED);
        _clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.TIMED_OUT);
        TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
        return buildEmptyAssignment(jobResource, currStateOutput);
    // For delayed tasks, trigger a rebalance event for the closest upcoming ready time
    scheduleForNextTask(jobResource, jobCtx, currentTime);
    // Make additional task assignments if needed.
    if (jobState != TaskState.TIMING_OUT && jobState != TaskState.TIMED_OUT && jobTgtState == TargetState.START) {
        // Contains the set of task partitions that must be excluded from consideration when making
        // any new assignments.
        // This includes all completed, failed, delayed, and already assigned partitions.
        Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
        addCompletedTasks(excludeSet, jobCtx, allPartitions);
        addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
        excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
        // Get instance->[partition, ...] mappings for the target resource.
        Map<String, SortedSet<Integer>> tgtPartitionAssignments = taskAssignmentCal.getTaskAssignment(currStateOutput, prevTaskToInstanceStateAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache.getIdealStates());
        if (!isGenericTaskJob(jobCfg) || jobCfg.isRebalanceRunningTask()) {
            dropRebalancedRunningTasks(tgtPartitionAssignments, prevInstanceToTaskAssignments, paMap, jobCtx);
        for (Map.Entry<String, SortedSet<Integer>> entry : prevInstanceToTaskAssignments.entrySet()) {
            String instance = entry.getKey();
            if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
            // 1. throttled by job configuration
            // Contains the set of task partitions currently assigned to the instance.
            Set<Integer> pSet = entry.getValue();
            int jobCfgLimitation = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
            // 2. throttled by participant capacity
            int participantCapacity = cache.getInstanceConfigMap().get(instance).getMaxConcurrentTask();
            if (participantCapacity == InstanceConfig.MAX_CONCURRENT_TASK_NOT_SET) {
                participantCapacity = cache.getClusterConfig().getMaxConcurrentTaskPerInstance();
            int participantLimitation = participantCapacity - cache.getParticipantActiveTaskCount(instance);
            // New tasks to be assigned
            int numToAssign = Math.min(jobCfgLimitation, participantLimitation);
            LOG.debug(String.format("Throttle tasks to be assigned to instance %s using limitation: Job Concurrent Task(%d), " + "Participant Max Task(%d). Remaining capacity %d.", instance, jobCfgLimitation, participantCapacity, numToAssign));
            if (numToAssign > 0) {
                Set<Integer> throttledSet = new HashSet<Integer>();
                List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, throttledSet, numToAssign);
                for (Integer pId : nextPartitions) {
                    String pName = pName(jobResource, pId);
                    paMap.put(pId, new PartitionAssignment(instance,;
                    jobCtx.setAssignedParticipant(pId, instance);
                    jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
                    jobCtx.setPartitionStartTime(pId, System.currentTimeMillis());
                    LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
                cache.setParticipantActiveTaskCount(instance, cache.getParticipantActiveTaskCount(instance) + nextPartitions.size());
                if (!throttledSet.isEmpty()) {
                    LOG.debug(throttledSet.size() + "tasks are ready but throttled when assigned to participant.");
    return toResourceAssignment(jobResource, paMap);
Also used : Partition(org.apache.helix.model.Partition) Message(org.apache.helix.model.Message) TreeMap(java.util.TreeMap) SortedSet(java.util.SortedSet) ResourceAssignment(org.apache.helix.model.ResourceAssignment) TreeSet(java.util.TreeSet) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap( TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 7 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class DeprecatedTaskRebalancer method emptyAssignment.

private static ResourceAssignment emptyAssignment(String name, CurrentStateOutput currStateOutput) {
    ResourceAssignment assignment = new ResourceAssignment(name);
    Set<Partition> partitions = currStateOutput.getCurrentStateMappedPartitions(name);
    for (Partition partition : partitions) {
        Map<String, String> currentStateMap = currStateOutput.getCurrentStateMap(name, partition);
        Map<String, String> replicaMap = Maps.newHashMap();
        for (String instanceName : currentStateMap.keySet()) {
            replicaMap.put(instanceName, HelixDefinedState.DROPPED.toString());
        assignment.addReplicaMap(partition, replicaMap);
    return assignment;
Also used : Partition(org.apache.helix.model.Partition) ResourceAssignment(org.apache.helix.model.ResourceAssignment)

Example 8 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class DeprecatedTaskRebalancer method computeBestPossiblePartitionState.

public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache clusterData, IdealState taskIs, Resource resource, CurrentStateOutput currStateOutput) {
    final String resourceName = resource.getResourceName();
    LOG.debug("Computer Best Partition for resource: " + resourceName);
    // Fetch job configuration
    JobConfig jobCfg = (JobConfig) clusterData.getResourceConfig(resourceName);
    if (jobCfg == null) {
        LOG.debug("Job configuration is NULL for " + resourceName);
        return emptyAssignment(resourceName, currStateOutput);
    String workflowResource = jobCfg.getWorkflow();
    // Fetch workflow configuration and context
    WorkflowConfig workflowCfg = clusterData.getWorkflowConfig(workflowResource);
    if (workflowCfg == null) {
        LOG.debug("Workflow configuration is NULL for " + resourceName);
        return emptyAssignment(resourceName, currStateOutput);
    WorkflowContext workflowCtx = clusterData.getWorkflowContext(workflowResource);
    // Initialize workflow context if needed
    if (workflowCtx == null) {
        workflowCtx = new WorkflowContext(new ZNRecord(TaskUtil.WORKFLOW_CONTEXT_KW));
        workflowCtx.setName(workflowResource);"Workflow context for " + resourceName + " created!");
    // check ancestor job status
    int notStartedCount = 0;
    int inCompleteCount = 0;
    for (String ancestor : workflowCfg.getJobDag().getAncestors(resourceName)) {
        TaskState jobState = workflowCtx.getJobState(ancestor);
        if (jobState == null || jobState == TaskState.NOT_STARTED) {
        } else if (jobState == TaskState.IN_PROGRESS || jobState == TaskState.STOPPED) {
    if (notStartedCount > 0 || (workflowCfg.isJobQueue() && inCompleteCount >= workflowCfg.getParallelJobs())) {
        LOG.debug("Job is not ready to be scheduled due to pending dependent jobs " + resourceName);
        return emptyAssignment(resourceName, currStateOutput);
    // Clean up if workflow marked for deletion
    TargetState targetState = workflowCfg.getTargetState();
    if (targetState == TargetState.DELETE) {"Workflow is marked as deleted " + workflowResource + " cleaning up the workflow context.");
        cleanup(_manager, resourceName, workflowCfg, workflowResource);
        return emptyAssignment(resourceName, currStateOutput);
    // Check if this workflow has been finished past its expiry.
    if (workflowCtx.getFinishTime() != WorkflowContext.UNFINISHED && workflowCtx.getFinishTime() + workflowCfg.getExpiry() <= System.currentTimeMillis()) {"Workflow " + workflowResource + " is completed and passed expiry time, cleaning up the workflow context.");
        markForDeletion(_manager, workflowResource);
        cleanup(_manager, resourceName, workflowCfg, workflowResource);
        return emptyAssignment(resourceName, currStateOutput);
    // Fetch any existing context information from the property store.
    JobContext jobCtx = clusterData.getJobContext(resourceName);
    if (jobCtx == null) {
        jobCtx = new JobContext(new ZNRecord(TaskUtil.TASK_CONTEXT_KW));
    // Check for expired jobs for non-terminable workflows
    long jobFinishTime = jobCtx.getFinishTime();
    if (!workflowCfg.isTerminable() && jobFinishTime != WorkflowContext.UNFINISHED && jobFinishTime + workflowCfg.getExpiry() <= System.currentTimeMillis()) {"Job " + resourceName + " is completed and passed expiry time, cleaning up the job context.");
        cleanup(_manager, resourceName, workflowCfg, workflowResource);
        return emptyAssignment(resourceName, currStateOutput);
    // The job is already in a final state (completed/failed).
    if (workflowCtx.getJobState(resourceName) == TaskState.FAILED || workflowCtx.getJobState(resourceName) == TaskState.COMPLETED) {
        LOG.debug("Job " + resourceName + " is failed or already completed.");
        return emptyAssignment(resourceName, currStateOutput);
    // Check for readiness, and stop processing if it's not ready
    boolean isReady = scheduleIfNotReady(workflowCfg, workflowCtx, workflowResource, resourceName, clusterData);
    if (!isReady) {
        LOG.debug("Job " + resourceName + " is not ready to be scheduled.");
        return emptyAssignment(resourceName, currStateOutput);
    // Grab the old assignment, or an empty one if it doesn't exist
    ResourceAssignment prevAssignment = getPrevResourceAssignment(_manager, resourceName);
    if (prevAssignment == null) {
        prevAssignment = new ResourceAssignment(resourceName);
    // Will contain the list of partitions that must be explicitly dropped from the ideal state that
    // is stored in zk.
    // Fetch the previous resource assignment from the property store. This is required because of
    // HELIX-230.
    Set<Integer> partitionsToDrop = new TreeSet<Integer>();
    ResourceAssignment newAssignment = computeResourceMapping(resourceName, workflowCfg, jobCfg, prevAssignment, clusterData.getLiveInstances().keySet(), currStateOutput, workflowCtx, jobCtx, partitionsToDrop, clusterData);
    if (!partitionsToDrop.isEmpty()) {
        for (Integer pId : partitionsToDrop) {
            taskIs.getRecord().getMapFields().remove(pName(resourceName, pId));
        HelixDataAccessor accessor = _manager.getHelixDataAccessor();
        PropertyKey propertyKey = accessor.keyBuilder().idealStates(resourceName);
        accessor.setProperty(propertyKey, taskIs);
    // Update Workflow and Job context in data cache and ZK.
    clusterData.updateJobContext(resourceName, jobCtx, _manager.getHelixDataAccessor());
    clusterData.updateWorkflowContext(workflowResource, workflowCtx, _manager.getHelixDataAccessor());
    setPrevResourceAssignment(_manager, resourceName, newAssignment);
    LOG.debug("Job " + resourceName + " new assignment " + Arrays.toString(newAssignment.getMappedPartitions().toArray()));
    return newAssignment;
Also used : ResourceAssignment(org.apache.helix.model.ResourceAssignment) HelixDataAccessor(org.apache.helix.HelixDataAccessor) TreeSet(java.util.TreeSet) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey)

Example 9 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class JobRebalancer method computeBestPossiblePartitionState.

public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache clusterData, IdealState taskIs, Resource resource, CurrentStateOutput currStateOutput) {
    final String jobName = resource.getResourceName();
    LOG.debug("Computer Best Partition for job: " + jobName);
    // Fetch job configuration
    JobConfig jobCfg = clusterData.getJobConfig(jobName);
    if (jobCfg == null) {
        LOG.error("Job configuration is NULL for " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    String workflowResource = jobCfg.getWorkflow();
    // Fetch workflow configuration and context
    WorkflowConfig workflowCfg = clusterData.getWorkflowConfig(workflowResource);
    if (workflowCfg == null) {
        LOG.error("Workflow configuration is NULL for " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    WorkflowContext workflowCtx = clusterData.getWorkflowContext(workflowResource);
    if (workflowCtx == null) {
        LOG.error("Workflow context is NULL for " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    TargetState targetState = workflowCfg.getTargetState();
    if (targetState != TargetState.START && targetState != TargetState.STOP) {"Target state is " + + " for workflow " + workflowResource + ".Stop scheduling job " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    // Stop current run of the job if workflow or job is already in final state (failed or completed)
    TaskState workflowState = workflowCtx.getWorkflowState();
    TaskState jobState = workflowCtx.getJobState(jobName);
    // The job is already in a final state (completed/failed).
    if (workflowState == TaskState.FAILED || workflowState == TaskState.COMPLETED || jobState == TaskState.FAILED || jobState == TaskState.COMPLETED) {"Workflow %s or job %s is already failed or completed, workflow state (%s), job state (%s), clean up job IS.", workflowResource, jobName, workflowState, jobState));
        TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    if (!isWorkflowReadyForSchedule(workflowCfg)) {"Job is not ready to be run since workflow is not ready " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    if (!isJobStarted(jobName, workflowCtx) && !isJobReadyToSchedule(jobName, workflowCfg, workflowCtx, getInCompleteJobCount(workflowCfg, workflowCtx), clusterData.getJobConfigMap())) {"Job is not ready to run " + jobName);
        return buildEmptyAssignment(jobName, currStateOutput);
    // Fetch any existing context information from the property store.
    JobContext jobCtx = clusterData.getJobContext(jobName);
    if (jobCtx == null) {
        jobCtx = new JobContext(new ZNRecord(TaskUtil.TASK_CONTEXT_KW));
        workflowCtx.setJobState(jobName, TaskState.IN_PROGRESS);
    if (!TaskState.TIMED_OUT.equals(workflowCtx.getJobState(jobName))) {
        scheduleRebalanceForTimeout(jobCfg.getJobId(), jobCtx.getStartTime(), jobCfg.getTimeout());
    // Grab the old assignment, or an empty one if it doesn't exist
    ResourceAssignment prevAssignment = getPrevResourceAssignment(jobName);
    if (prevAssignment == null) {
        prevAssignment = new ResourceAssignment(jobName);
    // Will contain the list of partitions that must be explicitly dropped from the ideal state that
    // is stored in zk.
    // Fetch the previous resource assignment from the property store. This is required because of
    // HELIX-230.
    Set<String> liveInstances = jobCfg.getInstanceGroupTag() == null ? clusterData.getEnabledLiveInstances() : clusterData.getEnabledLiveInstancesWithTag(jobCfg.getInstanceGroupTag());
    if (liveInstances.isEmpty()) {
        LOG.error("No available instance found for job!");
    Set<Integer> partitionsToDrop = new TreeSet<Integer>();
    ResourceAssignment newAssignment = computeResourceMapping(jobName, workflowCfg, jobCfg, prevAssignment, liveInstances, currStateOutput, workflowCtx, jobCtx, partitionsToDrop, clusterData);
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    PropertyKey propertyKey = accessor.keyBuilder().idealStates(jobName);
    taskIs = clusterData.getIdealState(jobName);
    if (!partitionsToDrop.isEmpty() && taskIs != null) {
        for (Integer pId : partitionsToDrop) {
            taskIs.getRecord().getMapFields().remove(pName(jobName, pId));
        accessor.setProperty(propertyKey, taskIs);
    // Update Workflow and Job context in data cache and ZK.
    clusterData.updateJobContext(jobName, jobCtx, _manager.getHelixDataAccessor());
    clusterData.updateWorkflowContext(workflowResource, workflowCtx, _manager.getHelixDataAccessor());
    setPrevResourceAssignment(jobName, newAssignment);
    LOG.debug("Job " + jobName + " new assignment " + Arrays.toString(newAssignment.getMappedPartitions().toArray()));
    return newAssignment;
Also used : ResourceAssignment(org.apache.helix.model.ResourceAssignment) HelixDataAccessor(org.apache.helix.HelixDataAccessor) TreeSet(java.util.TreeSet) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey)

Example 10 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class JobRebalancer method toResourceAssignment.

private ResourceAssignment toResourceAssignment(String jobResource, Map<Integer, PartitionAssignment> paMap) {
    // Construct a ResourceAssignment object from the map of partition assignments.
    ResourceAssignment ra = new ResourceAssignment(jobResource);
    for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet()) {
        PartitionAssignment pa = e.getValue();
        ra.addReplicaMap(new Partition(pName(jobResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
    return ra;
Also used : Partition(org.apache.helix.model.Partition) ResourceAssignment(org.apache.helix.model.ResourceAssignment) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap( TreeMap(java.util.TreeMap)


ResourceAssignment (org.apache.helix.model.ResourceAssignment)11 Partition (org.apache.helix.model.Partition)8 TreeSet (java.util.TreeSet)4 ImmutableMap ( HashMap (java.util.HashMap)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 StateModelDefinition (org.apache.helix.model.StateModelDefinition)3 HashSet (java.util.HashSet)2 SortedSet (java.util.SortedSet)2 HelixDataAccessor (org.apache.helix.HelixDataAccessor)2 PropertyKey (org.apache.helix.PropertyKey)2 ZNRecord (org.apache.helix.ZNRecord)2 Message (org.apache.helix.model.Message)2 BiMap ( HashBiMap ( ClusterConfig (org.apache.helix.model.ClusterConfig)1 Test (org.testng.annotations.Test)1