use of org.apache.helix.model.Partition in project helix by apache.
the class DelayedAutoRebalancer method computeBestPossiblePartitionState.
/**
* Compute the best state for all partitions.
* This is the default implementation, subclasses should re-implement
* this method if its logic to generate bestpossible map for each partition is different from the default one here.
*
* @param cache
* @param idealState
* @param resource
* @param currentStateOutput Provides the current state and pending state transitions for all partitions
* @return
*/
@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache cache, IdealState idealState, Resource resource, CurrentStateOutput currentStateOutput) {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing resource:" + resource.getResourceName());
}
Set<String> allNodes = cache.getEnabledInstances();
Set<String> liveNodes = cache.getLiveInstances().keySet();
ClusterConfig clusterConfig = cache.getClusterConfig();
long delayTime = getRebalanceDelay(idealState, clusterConfig);
Set<String> activeNodes = getActiveInstances(allNodes, idealState, liveNodes, cache.getInstanceOfflineTimeMap(), cache.getLiveInstances().keySet(), cache.getInstanceConfigMap(), delayTime, clusterConfig);
String stateModelDefName = idealState.getStateModelDefRef();
StateModelDefinition stateModelDef = cache.getStateModelDef(stateModelDefName);
ResourceAssignment partitionMapping = new ResourceAssignment(resource.getResourceName());
for (Partition partition : resource.getPartitions()) {
Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resource.getResourceName(), partition);
Set<String> disabledInstancesForPartition = cache.getDisabledInstancesForPartition(resource.getResourceName(), partition.toString());
List<String> preferenceList = getPreferenceList(partition, idealState, activeNodes);
Map<String, String> bestStateForPartition = computeBestPossibleStateForPartition(liveNodes, stateModelDef, preferenceList, currentStateMap, disabledInstancesForPartition, idealState);
partitionMapping.addReplicaMap(partition, bestStateForPartition);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Best possible mapping for resource " + resource.getResourceName() + ": " + partitionMapping);
}
return partitionMapping;
}
use of org.apache.helix.model.Partition in project helix by apache.
the class DeprecatedTaskRebalancer method computeResourceMapping.
private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
TargetState jobTgtState = workflowConfig.getTargetState();
// Update running status in workflow context
if (jobTgtState == TargetState.STOP) {
workflowCtx.setJobState(jobResource, TaskState.STOPPED);
// Workflow has been stopped if all jobs are stopped
if (isWorkflowStopped(workflowCtx, workflowConfig)) {
workflowCtx.setWorkflowState(TaskState.STOPPED);
}
} else {
workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
// Workflow is in progress if any task is in progress
workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
}
// Used to keep track of tasks that have already been assigned to instances.
Set<Integer> assignedPartitions = new HashSet<Integer>();
// Used to keep track of tasks that have failed, but whose failure is acceptable
Set<Integer> skippedPartitions = new HashSet<Integer>();
// Keeps a mapping of (partition) -> (instance, state)
Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
Set<String> excludedInstances = getInstancesAssignedToOtherJobs(jobResource, workflowConfig, cache);
// Process all the current assignments of tasks.
Set<Integer> allPartitions = getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache);
Map<String, SortedSet<Integer>> taskAssignments = getTaskPartitionAssignments(liveInstances, prevAssignment, allPartitions);
long currentTime = System.currentTimeMillis();
for (String instance : taskAssignments.keySet()) {
if (excludedInstances.contains(instance)) {
continue;
}
Set<Integer> pSet = taskAssignments.get(instance);
// Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
// TASK_ERROR, ERROR.
Set<Integer> donePartitions = new TreeSet<Integer>();
for (int pId : pSet) {
final String pName = pName(jobResource, pId);
// Check for pending state transitions on this (partition, instance).
Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
if (pendingMessage != null) {
// There is a pending state transition for this (partition, instance). Just copy forward
// the state assignment from the previous ideal state.
Map<String, String> stateMap = prevAssignment.getReplicaMap(new Partition(pName));
if (stateMap != null) {
String prevState = stateMap.get(instance);
paMap.put(pId, new PartitionAssignment(instance, prevState));
assignedPartitions.add(pId);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Task partition %s has a pending state transition on instance %s. Using the previous ideal state which was %s.", pName, instance, prevState));
}
}
continue;
}
TaskPartitionState currState = TaskPartitionState.valueOf(currStateOutput.getCurrentState(jobResource, new Partition(pName), instance));
jobCtx.setPartitionState(pId, currState);
// Process any requested state transitions.
String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
if (requestedState.equals(currState)) {
LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
}
paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
continue;
}
switch(currState) {
case RUNNING:
case STOPPED:
{
TaskPartitionState nextState;
if (jobTgtState == TargetState.START) {
nextState = TaskPartitionState.RUNNING;
} else {
nextState = TaskPartitionState.STOPPED;
}
paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
}
break;
case COMPLETED:
{
// The task has completed on this partition. Mark as such in the context object.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
partitionsToDropFromIs.add(pId);
markPartitionCompleted(jobCtx, pId);
}
break;
case TIMED_OUT:
case TASK_ERROR:
case ERROR:
{
// The task may be rescheduled on a different instance.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has error state %s. Marking as such in rebalancer context.", pName, currState));
markPartitionError(jobCtx, pId, currState, true);
// maximum number of attempts.
if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask()) {
// If the user does not require this task to succeed in order for the job to succeed,
// then we don't have to fail the job right now
boolean successOptional = false;
String taskId = jobCtx.getTaskIdForPartition(pId);
if (taskId != null) {
TaskConfig taskConfig = jobCfg.getTaskConfig(taskId);
if (taskConfig != null) {
successOptional = taskConfig.isSuccessOptional();
}
}
// to fail the job immediately
if (skippedPartitions.size() < jobCfg.getFailureThreshold()) {
successOptional = true;
}
if (!successOptional) {
long finishTime = currentTime;
workflowCtx.setJobState(jobResource, TaskState.FAILED);
if (workflowConfig.isTerminable()) {
workflowCtx.setWorkflowState(TaskState.FAILED);
workflowCtx.setFinishTime(finishTime);
}
jobCtx.setFinishTime(finishTime);
markAllPartitionsError(jobCtx, currState, false);
addAllPartitions(allPartitions, partitionsToDropFromIs);
return emptyAssignment(jobResource, currStateOutput);
} else {
skippedPartitions.add(pId);
partitionsToDropFromIs.add(pId);
}
} else {
// Mark the task to be started at some later time (if enabled)
markPartitionDelayed(jobCfg, jobCtx, pId);
}
}
break;
case INIT:
case DROPPED:
{
// currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
}
break;
default:
throw new AssertionError("Unknown enum symbol: " + currState);
}
}
// Remove the set of task partitions that are completed or in one of the error states.
pSet.removeAll(donePartitions);
}
// For delayed tasks, trigger a rebalance event for the closest upcoming ready time
scheduleForNextTask(jobResource, jobCtx, currentTime);
if (isJobComplete(jobCtx, allPartitions, skippedPartitions, jobCfg)) {
workflowCtx.setJobState(jobResource, TaskState.COMPLETED);
jobCtx.setFinishTime(currentTime);
if (isWorkflowComplete(workflowCtx, workflowConfig)) {
workflowCtx.setWorkflowState(TaskState.COMPLETED);
workflowCtx.setFinishTime(currentTime);
}
}
// Make additional task assignments if needed.
if (jobTgtState == TargetState.START) {
// Contains the set of task partitions that must be excluded from consideration when making
// any new assignments.
// This includes all completed, failed, delayed, and already assigned partitions.
Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
addCompletedPartitions(excludeSet, jobCtx, allPartitions);
addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
excludeSet.addAll(skippedPartitions);
excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
// Get instance->[partition, ...] mappings for the target resource.
Map<String, SortedSet<Integer>> tgtPartitionAssignments = getTaskAssignment(currStateOutput, prevAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache);
for (Map.Entry<String, SortedSet<Integer>> entry : taskAssignments.entrySet()) {
String instance = entry.getKey();
if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
continue;
}
// Contains the set of task partitions currently assigned to the instance.
Set<Integer> pSet = entry.getValue();
int numToAssign = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
if (numToAssign > 0) {
List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, numToAssign);
for (Integer pId : nextPartitions) {
String pName = pName(jobResource, pId);
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
excludeSet.add(pId);
jobCtx.setAssignedParticipant(pId, instance);
jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
}
}
}
}
// Construct a ResourceAssignment object from the map of partition assignments.
ResourceAssignment ra = new ResourceAssignment(jobResource);
for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet()) {
PartitionAssignment pa = e.getValue();
ra.addReplicaMap(new Partition(pName(jobResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
}
return ra;
}
use of org.apache.helix.model.Partition in project helix by apache.
the class FixedTargetTaskAssignmentCalculator method getTgtPartitionAssignment.
/**
* Get partition assignments for the target resource, but only for the partitions of interest.
* @param currStateOutput The current state of the instances in the cluster.
* @param instances The instances.
* @param tgtIs The ideal state of the target resource.
* @param tgtStates Only partitions in this set of states will be considered. If null, partitions
* do not need to
* be in any specific state to be considered.
* @param includeSet The set of partitions to consider.
* @return A map of instance vs set of partition ids assigned to that instance.
*/
private static Map<String, SortedSet<Integer>> getTgtPartitionAssignment(CurrentStateOutput currStateOutput, Iterable<String> instances, IdealState tgtIs, Set<String> tgtStates, Set<Integer> includeSet, JobContext jobCtx) {
Map<String, SortedSet<Integer>> result = new HashMap<String, SortedSet<Integer>>();
for (String instance : instances) {
result.put(instance, new TreeSet<Integer>());
}
Map<String, List<Integer>> partitionsByTarget = jobCtx.getPartitionsByTarget();
for (String pName : tgtIs.getPartitionSet()) {
List<Integer> partitions = partitionsByTarget.get(pName);
if (partitions == null || partitions.size() < 1) {
continue;
}
int pId = partitions.get(0);
if (includeSet.contains(pId)) {
for (String instance : instances) {
Message pendingMessage = currStateOutput.getPendingState(tgtIs.getResourceName(), new Partition(pName), instance);
if (pendingMessage != null) {
continue;
}
String s = currStateOutput.getCurrentState(tgtIs.getResourceName(), new Partition(pName), instance);
if (s != null && (tgtStates == null || tgtStates.contains(s))) {
result.get(instance).add(pId);
}
}
}
}
return result;
}
use of org.apache.helix.model.Partition in project helix by apache.
the class TestRebalancerMetrics method copyCurrentStateFromBestPossible.
private CurrentStateOutput copyCurrentStateFromBestPossible(BestPossibleStateOutput bestPossibleStateOutput, String resource) {
CurrentStateOutput currentStateOutput = new CurrentStateOutput();
PartitionStateMap partitionStateMap = bestPossibleStateOutput.getPartitionStateMap(resource);
for (Partition partition : partitionStateMap.partitionSet()) {
Map<String, String> stateMap = partitionStateMap.getPartitionMap(partition);
for (String instance : stateMap.keySet()) {
currentStateOutput.setCurrentState(resource, partition, instance, stateMap.get(instance));
}
}
return currentStateOutput;
}
use of org.apache.helix.model.Partition in project helix by apache.
the class JobRebalancer method computeResourceMapping.
private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevTaskToInstanceStateAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
TargetState jobTgtState = workflowConfig.getTargetState();
TaskState jobState = workflowCtx.getJobState(jobResource);
TaskState workflowState = workflowCtx.getWorkflowState();
if (jobState == TaskState.IN_PROGRESS && (isTimeout(jobCtx.getStartTime(), jobCfg.getTimeout()) || TaskState.TIMED_OUT.equals(workflowState))) {
jobState = TaskState.TIMING_OUT;
workflowCtx.setJobState(jobResource, TaskState.TIMING_OUT);
} else if (jobState != TaskState.TIMING_OUT && jobState != TaskState.FAILING) {
// Update running status in workflow context
if (jobTgtState == TargetState.STOP) {
if (checkJobStopped(jobCtx)) {
workflowCtx.setJobState(jobResource, TaskState.STOPPED);
} else {
workflowCtx.setJobState(jobResource, TaskState.STOPPING);
}
// Workflow has been stopped if all in progress jobs are stopped
if (isWorkflowStopped(workflowCtx, workflowConfig)) {
workflowCtx.setWorkflowState(TaskState.STOPPED);
} else {
workflowCtx.setWorkflowState(TaskState.STOPPING);
}
} else {
workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
// Workflow is in progress if any task is in progress
workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
}
}
// Used to keep track of tasks that have already been assigned to instances.
Set<Integer> assignedPartitions = new HashSet<Integer>();
// Used to keep track of tasks that have failed, but whose failure is acceptable
Set<Integer> skippedPartitions = new HashSet<Integer>();
// Keeps a mapping of (partition) -> (instance, state)
Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
Set<String> excludedInstances = getExcludedInstances(jobResource, workflowConfig, cache);
// Process all the current assignments of tasks.
TaskAssignmentCalculator taskAssignmentCal = getAssignmentCalulator(jobCfg);
Set<Integer> allPartitions = taskAssignmentCal.getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache.getIdealStates());
if (allPartitions == null || allPartitions.isEmpty()) {
// Empty target partitions, mark the job as FAILED.
String failureMsg = "Empty task partition mapping for job " + jobResource + ", marked the job as FAILED!";
LOG.info(failureMsg);
jobCtx.setInfo(failureMsg);
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
markAllPartitionsError(jobCtx, TaskPartitionState.ERROR, false);
return new ResourceAssignment(jobResource);
}
Map<String, SortedSet<Integer>> prevInstanceToTaskAssignments = getPrevInstanceToTaskAssignments(liveInstances, prevTaskToInstanceStateAssignment, allPartitions);
long currentTime = System.currentTimeMillis();
LOG.debug("All partitions: " + allPartitions + " taskAssignment: " + prevInstanceToTaskAssignments + " excludedInstances: " + excludedInstances);
// Iterate through all instances
for (String instance : prevInstanceToTaskAssignments.keySet()) {
if (excludedInstances.contains(instance)) {
continue;
}
Set<Integer> pSet = prevInstanceToTaskAssignments.get(instance);
// Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
// TASK_ERROR, ERROR.
Set<Integer> donePartitions = new TreeSet<Integer>();
for (int pId : pSet) {
final String pName = pName(jobResource, pId);
TaskPartitionState currState = updateJobContextAndGetTaskCurrentState(currStateOutput, jobResource, pId, pName, instance, jobCtx);
// Check for pending state transitions on this (partition, instance).
Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
if (pendingMessage != null && !pendingMessage.getToState().equals(currState.name())) {
processTaskWithPendingMessage(prevTaskToInstanceStateAssignment, pId, pName, instance, pendingMessage, jobState, currState, paMap, assignedPartitions);
continue;
}
// Process any requested state transitions.
String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
if (requestedState.equals(currState)) {
LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
}
paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
continue;
}
switch(currState) {
case RUNNING:
{
TaskPartitionState nextState = TaskPartitionState.RUNNING;
if (jobState == TaskState.TIMING_OUT) {
nextState = TaskPartitionState.TASK_ABORTED;
} else if (jobTgtState == TargetState.STOP) {
nextState = TaskPartitionState.STOPPED;
}
paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
}
break;
case STOPPED:
{
TaskPartitionState nextState;
if (jobTgtState == TargetState.START) {
nextState = TaskPartitionState.RUNNING;
} else {
nextState = TaskPartitionState.STOPPED;
}
paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
}
break;
case COMPLETED:
{
// The task has completed on this partition. Mark as such in the context object.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
partitionsToDropFromIs.add(pId);
markPartitionCompleted(jobCtx, pId);
}
break;
case TIMED_OUT:
case TASK_ERROR:
case TASK_ABORTED:
case ERROR:
{
// The task may be rescheduled on a different instance.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has error state %s with msg %s. Marking as such in rebalancer context.", pName, currState, jobCtx.getPartitionInfo(pId)));
markPartitionError(jobCtx, pId, currState, true);
// After all tasks are aborted, they will be dropped, because of job timeout.
if (jobState != TaskState.TIMED_OUT && jobState != TaskState.TIMING_OUT) {
if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask() || currState.equals(TaskPartitionState.TASK_ABORTED) || currState.equals(TaskPartitionState.ERROR)) {
skippedPartitions.add(pId);
partitionsToDropFromIs.add(pId);
LOG.debug("skippedPartitions:" + skippedPartitions);
} else {
// Mark the task to be started at some later time (if enabled)
markPartitionDelayed(jobCfg, jobCtx, pId);
}
}
}
break;
case INIT:
case DROPPED:
{
// currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
}
break;
default:
throw new AssertionError("Unknown enum symbol: " + currState);
}
}
// Remove the set of task partitions that are completed or in one of the error states.
pSet.removeAll(donePartitions);
}
addGiveupPartitions(skippedPartitions, jobCtx, allPartitions, jobCfg);
if (jobState == TaskState.IN_PROGRESS && skippedPartitions.size() > jobCfg.getFailureThreshold()) {
if (isJobFinished(jobCtx, jobResource, currStateOutput)) {
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
return buildEmptyAssignment(jobResource, currStateOutput);
}
workflowCtx.setJobState(jobResource, TaskState.FAILING);
// Drop all assigned but not given-up tasks
for (int pId : jobCtx.getPartitionSet()) {
String instance = jobCtx.getAssignedParticipant(pId);
if (jobCtx.getPartitionState(pId) != null && !isTaskGivenup(jobCtx, jobCfg, pId)) {
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.TASK_ABORTED.name()));
}
Partition partition = new Partition(pName(jobResource, pId));
Message pendingMessage = currStateOutput.getPendingState(jobResource, partition, instance);
// so that Helix will cancel the transition.
if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT && pendingMessage != null) {
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.INIT.name()));
}
}
return toResourceAssignment(jobResource, paMap);
}
if (jobState == TaskState.FAILING && isJobFinished(jobCtx, jobResource, currStateOutput)) {
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
return buildEmptyAssignment(jobResource, currStateOutput);
}
if (isJobComplete(jobCtx, allPartitions, jobCfg)) {
markJobComplete(jobResource, jobCtx, workflowConfig, workflowCtx, cache.getJobConfigMap());
_clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.COMPLETED, jobCtx.getFinishTime() - jobCtx.getStartTime());
_rebalanceScheduler.removeScheduledRebalance(jobResource);
TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
return buildEmptyAssignment(jobResource, currStateOutput);
}
// can be dropped(note that Helix doesn't track whether the drop is success or not).
if (jobState == TaskState.TIMING_OUT && isJobFinished(jobCtx, jobResource, currStateOutput)) {
jobCtx.setFinishTime(System.currentTimeMillis());
workflowCtx.setJobState(jobResource, TaskState.TIMED_OUT);
// Mark all INIT task to TASK_ABORTED
for (int pId : jobCtx.getPartitionSet()) {
if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT) {
jobCtx.setPartitionState(pId, TaskPartitionState.TASK_ABORTED);
}
}
_clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.TIMED_OUT);
_rebalanceScheduler.removeScheduledRebalance(jobResource);
TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
return buildEmptyAssignment(jobResource, currStateOutput);
}
// For delayed tasks, trigger a rebalance event for the closest upcoming ready time
scheduleForNextTask(jobResource, jobCtx, currentTime);
// Make additional task assignments if needed.
if (jobState != TaskState.TIMING_OUT && jobState != TaskState.TIMED_OUT && jobTgtState == TargetState.START) {
// Contains the set of task partitions that must be excluded from consideration when making
// any new assignments.
// This includes all completed, failed, delayed, and already assigned partitions.
Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
addCompletedTasks(excludeSet, jobCtx, allPartitions);
addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
excludeSet.addAll(skippedPartitions);
excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
// Get instance->[partition, ...] mappings for the target resource.
Map<String, SortedSet<Integer>> tgtPartitionAssignments = taskAssignmentCal.getTaskAssignment(currStateOutput, prevTaskToInstanceStateAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache.getIdealStates());
if (!isGenericTaskJob(jobCfg) || jobCfg.isRebalanceRunningTask()) {
dropRebalancedRunningTasks(tgtPartitionAssignments, prevInstanceToTaskAssignments, paMap, jobCtx);
}
for (Map.Entry<String, SortedSet<Integer>> entry : prevInstanceToTaskAssignments.entrySet()) {
String instance = entry.getKey();
if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
continue;
}
// 1. throttled by job configuration
// Contains the set of task partitions currently assigned to the instance.
Set<Integer> pSet = entry.getValue();
int jobCfgLimitation = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
// 2. throttled by participant capacity
int participantCapacity = cache.getInstanceConfigMap().get(instance).getMaxConcurrentTask();
if (participantCapacity == InstanceConfig.MAX_CONCURRENT_TASK_NOT_SET) {
participantCapacity = cache.getClusterConfig().getMaxConcurrentTaskPerInstance();
}
int participantLimitation = participantCapacity - cache.getParticipantActiveTaskCount(instance);
// New tasks to be assigned
int numToAssign = Math.min(jobCfgLimitation, participantLimitation);
LOG.debug(String.format("Throttle tasks to be assigned to instance %s using limitation: Job Concurrent Task(%d), " + "Participant Max Task(%d). Remaining capacity %d.", instance, jobCfgLimitation, participantCapacity, numToAssign));
if (numToAssign > 0) {
Set<Integer> throttledSet = new HashSet<Integer>();
List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, throttledSet, numToAssign);
for (Integer pId : nextPartitions) {
String pName = pName(jobResource, pId);
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
excludeSet.add(pId);
jobCtx.setAssignedParticipant(pId, instance);
jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
jobCtx.setPartitionStartTime(pId, System.currentTimeMillis());
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
}
cache.setParticipantActiveTaskCount(instance, cache.getParticipantActiveTaskCount(instance) + nextPartitions.size());
if (!throttledSet.isEmpty()) {
LOG.debug(throttledSet.size() + "tasks are ready but throttled when assigned to participant.");
}
}
}
}
return toResourceAssignment(jobResource, paMap);
}
Aggregations