Search in sources :

Example 1 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class AbstractRebalancer method computeBestPossiblePartitionState.

/**
 * Compute the best state for all partitions.
 * This is the default implementation, subclasses should re-implement
 * this method if its logic to generate bestpossible map for each partition is different from the default one here.
 *
 * @param cache
 * @param idealState
 * @param resource
 * @param currentStateOutput
 *          Provides the current state and pending state transitions for all partitions
 * @return
 */
@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache cache, IdealState idealState, Resource resource, CurrentStateOutput currentStateOutput) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing resource:" + resource.getResourceName());
    }
    String stateModelDefName = idealState.getStateModelDefRef();
    StateModelDefinition stateModelDef = cache.getStateModelDef(stateModelDefName);
    ResourceAssignment partitionMapping = new ResourceAssignment(resource.getResourceName());
    for (Partition partition : resource.getPartitions()) {
        Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resource.getResourceName(), partition);
        Set<String> disabledInstancesForPartition = cache.getDisabledInstancesForPartition(resource.getResourceName(), partition.toString());
        List<String> preferenceList = getPreferenceList(partition, idealState, Collections.unmodifiableSet(cache.getLiveInstances().keySet()));
        Map<String, String> bestStateForPartition = computeBestPossibleStateForPartition(cache.getLiveInstances().keySet(), stateModelDef, preferenceList, currentStateMap, disabledInstancesForPartition, idealState);
        partitionMapping.addReplicaMap(partition, bestStateForPartition);
    }
    return partitionMapping;
}
Also used : Partition(org.apache.helix.model.Partition) ResourceAssignment(org.apache.helix.model.ResourceAssignment) StateModelDefinition(org.apache.helix.model.StateModelDefinition)

Example 2 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class CustomRebalancer method computeBestPossiblePartitionState.

@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache cache, IdealState idealState, Resource resource, CurrentStateOutput currentStateOutput) {
    // Looking for cached BestPossible mapping for this resource, if it is already there, do not recompute it again.
    // The cached mapping will be cleared in ClusterDataCache if there is anything changed in cluster state that can
    // cause the potential changes in BestPossible state.
    ResourceAssignment partitionMapping = cache.getCachedResourceAssignment(resource.getResourceName());
    if (partitionMapping != null) {
        return partitionMapping;
    }
    LOG.info("Computing BestPossibleMapping for " + resource.getResourceName());
    String stateModelDefName = idealState.getStateModelDefRef();
    StateModelDefinition stateModelDef = cache.getStateModelDef(stateModelDefName);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing resource:" + resource.getResourceName());
    }
    partitionMapping = new ResourceAssignment(resource.getResourceName());
    for (Partition partition : resource.getPartitions()) {
        Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resource.getResourceName(), partition);
        Set<String> disabledInstancesForPartition = cache.getDisabledInstancesForPartition(resource.getResourceName(), partition.toString());
        Map<String, String> idealStateMap = idealState.getInstanceStateMap(partition.getPartitionName());
        Map<String, String> bestStateForPartition = computeCustomizedBestStateForPartition(cache, stateModelDef, idealStateMap, currentStateMap, disabledInstancesForPartition, idealState.isEnabled());
        partitionMapping.addReplicaMap(partition, bestStateForPartition);
    }
    cache.setCachedResourceAssignment(resource.getResourceName(), partitionMapping);
    return partitionMapping;
}
Also used : Partition(org.apache.helix.model.Partition) ResourceAssignment(org.apache.helix.model.ResourceAssignment) StateModelDefinition(org.apache.helix.model.StateModelDefinition)

Example 3 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class DelayedAutoRebalancer method computeBestPossiblePartitionState.

/**
 * Compute the best state for all partitions.
 * This is the default implementation, subclasses should re-implement
 * this method if its logic to generate bestpossible map for each partition is different from the default one here.
 *
 * @param cache
 * @param idealState
 * @param resource
 * @param currentStateOutput Provides the current state and pending state transitions for all partitions
 * @return
 */
@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache cache, IdealState idealState, Resource resource, CurrentStateOutput currentStateOutput) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing resource:" + resource.getResourceName());
    }
    Set<String> allNodes = cache.getEnabledInstances();
    Set<String> liveNodes = cache.getLiveInstances().keySet();
    ClusterConfig clusterConfig = cache.getClusterConfig();
    long delayTime = getRebalanceDelay(idealState, clusterConfig);
    Set<String> activeNodes = getActiveInstances(allNodes, idealState, liveNodes, cache.getInstanceOfflineTimeMap(), cache.getLiveInstances().keySet(), cache.getInstanceConfigMap(), delayTime, clusterConfig);
    String stateModelDefName = idealState.getStateModelDefRef();
    StateModelDefinition stateModelDef = cache.getStateModelDef(stateModelDefName);
    ResourceAssignment partitionMapping = new ResourceAssignment(resource.getResourceName());
    for (Partition partition : resource.getPartitions()) {
        Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resource.getResourceName(), partition);
        Set<String> disabledInstancesForPartition = cache.getDisabledInstancesForPartition(resource.getResourceName(), partition.toString());
        List<String> preferenceList = getPreferenceList(partition, idealState, activeNodes);
        Map<String, String> bestStateForPartition = computeBestPossibleStateForPartition(liveNodes, stateModelDef, preferenceList, currentStateMap, disabledInstancesForPartition, idealState);
        partitionMapping.addReplicaMap(partition, bestStateForPartition);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Best possible mapping for resource  " + resource.getResourceName() + ": " + partitionMapping);
    }
    return partitionMapping;
}
Also used : Partition(org.apache.helix.model.Partition) ResourceAssignment(org.apache.helix.model.ResourceAssignment) StateModelDefinition(org.apache.helix.model.StateModelDefinition) ClusterConfig(org.apache.helix.model.ClusterConfig)

Example 4 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class TestJobStateOnCreation method testJobStateOnCreation.

@Test
public void testJobStateOnCreation() {
    Workflow.Builder builder = new Workflow.Builder(WORKFLOW_NAME);
    JobConfig.Builder jobConfigBuilder = new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND).setTargetResource(WORKFLOW_NAME).setTargetPartitionStates(Sets.newHashSet("SLAVE", "MASTER")).setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG);
    String jobName = "job";
    builder = builder.addJob(jobName, jobConfigBuilder);
    Workflow workflow = builder.build();
    WorkflowConfig workflowConfig = workflow.getWorkflowConfig();
    JobConfig jobConfig = jobConfigBuilder.build();
    workflowConfig.getRecord().merge(jobConfig.getRecord());
    _cache.getJobConfigMap().put(WORKFLOW_NAME + "_" + jobName, jobConfig);
    _cache.getWorkflowConfigMap().put(WORKFLOW_NAME, workflowConfig);
    WorkflowRebalancer workflowRebalancer = new WorkflowRebalancer();
    workflowRebalancer.init(_manager);
    ResourceAssignment resourceAssignment = workflowRebalancer.computeBestPossiblePartitionState(_cache, _idealState, _resource, _currStateOutput);
    WorkflowContext workflowContext = _cache.getWorkflowContext(WORKFLOW_NAME);
    Map<String, TaskState> jobStates = workflowContext.getJobStates();
    for (String job : jobStates.keySet()) {
        Assert.assertEquals(jobStates.get(job), TaskState.NOT_STARTED);
    }
}
Also used : ResourceAssignment(org.apache.helix.model.ResourceAssignment) Test(org.testng.annotations.Test)

Example 5 with ResourceAssignment

use of org.apache.helix.model.ResourceAssignment in project helix by apache.

the class DeprecatedTaskRebalancer method computeResourceMapping.

private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
    TargetState jobTgtState = workflowConfig.getTargetState();
    // Update running status in workflow context
    if (jobTgtState == TargetState.STOP) {
        workflowCtx.setJobState(jobResource, TaskState.STOPPED);
        // Workflow has been stopped if all jobs are stopped
        if (isWorkflowStopped(workflowCtx, workflowConfig)) {
            workflowCtx.setWorkflowState(TaskState.STOPPED);
        }
    } else {
        workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
        // Workflow is in progress if any task is in progress
        workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
    }
    // Used to keep track of tasks that have already been assigned to instances.
    Set<Integer> assignedPartitions = new HashSet<Integer>();
    // Used to keep track of tasks that have failed, but whose failure is acceptable
    Set<Integer> skippedPartitions = new HashSet<Integer>();
    // Keeps a mapping of (partition) -> (instance, state)
    Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
    Set<String> excludedInstances = getInstancesAssignedToOtherJobs(jobResource, workflowConfig, cache);
    // Process all the current assignments of tasks.
    Set<Integer> allPartitions = getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache);
    Map<String, SortedSet<Integer>> taskAssignments = getTaskPartitionAssignments(liveInstances, prevAssignment, allPartitions);
    long currentTime = System.currentTimeMillis();
    for (String instance : taskAssignments.keySet()) {
        if (excludedInstances.contains(instance)) {
            continue;
        }
        Set<Integer> pSet = taskAssignments.get(instance);
        // Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
        // TASK_ERROR, ERROR.
        Set<Integer> donePartitions = new TreeSet<Integer>();
        for (int pId : pSet) {
            final String pName = pName(jobResource, pId);
            // Check for pending state transitions on this (partition, instance).
            Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
            if (pendingMessage != null) {
                // There is a pending state transition for this (partition, instance). Just copy forward
                // the state assignment from the previous ideal state.
                Map<String, String> stateMap = prevAssignment.getReplicaMap(new Partition(pName));
                if (stateMap != null) {
                    String prevState = stateMap.get(instance);
                    paMap.put(pId, new PartitionAssignment(instance, prevState));
                    assignedPartitions.add(pId);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(String.format("Task partition %s has a pending state transition on instance %s. Using the previous ideal state which was %s.", pName, instance, prevState));
                    }
                }
                continue;
            }
            TaskPartitionState currState = TaskPartitionState.valueOf(currStateOutput.getCurrentState(jobResource, new Partition(pName), instance));
            jobCtx.setPartitionState(pId, currState);
            // Process any requested state transitions.
            String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
            if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
                TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
                if (requestedState.equals(currState)) {
                    LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
                }
                paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
                assignedPartitions.add(pId);
                LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
                continue;
            }
            switch(currState) {
                case RUNNING:
                case STOPPED:
                    {
                        TaskPartitionState nextState;
                        if (jobTgtState == TargetState.START) {
                            nextState = TaskPartitionState.RUNNING;
                        } else {
                            nextState = TaskPartitionState.STOPPED;
                        }
                        paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
                        assignedPartitions.add(pId);
                        LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
                    }
                    break;
                case COMPLETED:
                    {
                        // The task has completed on this partition. Mark as such in the context object.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
                        partitionsToDropFromIs.add(pId);
                        markPartitionCompleted(jobCtx, pId);
                    }
                    break;
                case TIMED_OUT:
                case TASK_ERROR:
                case ERROR:
                    {
                        // The task may be rescheduled on a different instance.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has error state %s. Marking as such in rebalancer context.", pName, currState));
                        markPartitionError(jobCtx, pId, currState, true);
                        // maximum number of attempts.
                        if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask()) {
                            // If the user does not require this task to succeed in order for the job to succeed,
                            // then we don't have to fail the job right now
                            boolean successOptional = false;
                            String taskId = jobCtx.getTaskIdForPartition(pId);
                            if (taskId != null) {
                                TaskConfig taskConfig = jobCfg.getTaskConfig(taskId);
                                if (taskConfig != null) {
                                    successOptional = taskConfig.isSuccessOptional();
                                }
                            }
                            // to fail the job immediately
                            if (skippedPartitions.size() < jobCfg.getFailureThreshold()) {
                                successOptional = true;
                            }
                            if (!successOptional) {
                                long finishTime = currentTime;
                                workflowCtx.setJobState(jobResource, TaskState.FAILED);
                                if (workflowConfig.isTerminable()) {
                                    workflowCtx.setWorkflowState(TaskState.FAILED);
                                    workflowCtx.setFinishTime(finishTime);
                                }
                                jobCtx.setFinishTime(finishTime);
                                markAllPartitionsError(jobCtx, currState, false);
                                addAllPartitions(allPartitions, partitionsToDropFromIs);
                                return emptyAssignment(jobResource, currStateOutput);
                            } else {
                                skippedPartitions.add(pId);
                                partitionsToDropFromIs.add(pId);
                            }
                        } else {
                            // Mark the task to be started at some later time (if enabled)
                            markPartitionDelayed(jobCfg, jobCtx, pId);
                        }
                    }
                    break;
                case INIT:
                case DROPPED:
                    {
                        // currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
                    }
                    break;
                default:
                    throw new AssertionError("Unknown enum symbol: " + currState);
            }
        }
        // Remove the set of task partitions that are completed or in one of the error states.
        pSet.removeAll(donePartitions);
    }
    // For delayed tasks, trigger a rebalance event for the closest upcoming ready time
    scheduleForNextTask(jobResource, jobCtx, currentTime);
    if (isJobComplete(jobCtx, allPartitions, skippedPartitions, jobCfg)) {
        workflowCtx.setJobState(jobResource, TaskState.COMPLETED);
        jobCtx.setFinishTime(currentTime);
        if (isWorkflowComplete(workflowCtx, workflowConfig)) {
            workflowCtx.setWorkflowState(TaskState.COMPLETED);
            workflowCtx.setFinishTime(currentTime);
        }
    }
    // Make additional task assignments if needed.
    if (jobTgtState == TargetState.START) {
        // Contains the set of task partitions that must be excluded from consideration when making
        // any new assignments.
        // This includes all completed, failed, delayed, and already assigned partitions.
        Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
        addCompletedPartitions(excludeSet, jobCtx, allPartitions);
        addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
        excludeSet.addAll(skippedPartitions);
        excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
        // Get instance->[partition, ...] mappings for the target resource.
        Map<String, SortedSet<Integer>> tgtPartitionAssignments = getTaskAssignment(currStateOutput, prevAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache);
        for (Map.Entry<String, SortedSet<Integer>> entry : taskAssignments.entrySet()) {
            String instance = entry.getKey();
            if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
                continue;
            }
            // Contains the set of task partitions currently assigned to the instance.
            Set<Integer> pSet = entry.getValue();
            int numToAssign = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
            if (numToAssign > 0) {
                List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, numToAssign);
                for (Integer pId : nextPartitions) {
                    String pName = pName(jobResource, pId);
                    paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
                    excludeSet.add(pId);
                    jobCtx.setAssignedParticipant(pId, instance);
                    jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
                    LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
                }
            }
        }
    }
    // Construct a ResourceAssignment object from the map of partition assignments.
    ResourceAssignment ra = new ResourceAssignment(jobResource);
    for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet()) {
        PartitionAssignment pa = e.getValue();
        ra.addReplicaMap(new Partition(pName(jobResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
    }
    return ra;
}
Also used : Message(org.apache.helix.model.Message) SortedSet(java.util.SortedSet) ResourceAssignment(org.apache.helix.model.ResourceAssignment) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Partition(org.apache.helix.model.Partition) TreeMap(java.util.TreeMap) HashMap(java.util.HashMap) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashBiMap(com.google.common.collect.HashBiMap) TreeMap(java.util.TreeMap)

Aggregations

ResourceAssignment (org.apache.helix.model.ResourceAssignment)11 Partition (org.apache.helix.model.Partition)8 TreeSet (java.util.TreeSet)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 StateModelDefinition (org.apache.helix.model.StateModelDefinition)3 HashSet (java.util.HashSet)2 SortedSet (java.util.SortedSet)2 HelixDataAccessor (org.apache.helix.HelixDataAccessor)2 PropertyKey (org.apache.helix.PropertyKey)2 ZNRecord (org.apache.helix.ZNRecord)2 Message (org.apache.helix.model.Message)2 BiMap (com.google.common.collect.BiMap)1 HashBiMap (com.google.common.collect.HashBiMap)1 ClusterConfig (org.apache.helix.model.ClusterConfig)1 Test (org.testng.annotations.Test)1