use of org.apache.helix.model.ResourceAssignment in project helix by apache.
the class JobRebalancer method computeResourceMapping.
private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevTaskToInstanceStateAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
TargetState jobTgtState = workflowConfig.getTargetState();
TaskState jobState = workflowCtx.getJobState(jobResource);
TaskState workflowState = workflowCtx.getWorkflowState();
if (jobState == TaskState.IN_PROGRESS && (isTimeout(jobCtx.getStartTime(), jobCfg.getTimeout()) || TaskState.TIMED_OUT.equals(workflowState))) {
jobState = TaskState.TIMING_OUT;
workflowCtx.setJobState(jobResource, TaskState.TIMING_OUT);
} else if (jobState != TaskState.TIMING_OUT && jobState != TaskState.FAILING) {
// Update running status in workflow context
if (jobTgtState == TargetState.STOP) {
if (checkJobStopped(jobCtx)) {
workflowCtx.setJobState(jobResource, TaskState.STOPPED);
} else {
workflowCtx.setJobState(jobResource, TaskState.STOPPING);
}
// Workflow has been stopped if all in progress jobs are stopped
if (isWorkflowStopped(workflowCtx, workflowConfig)) {
workflowCtx.setWorkflowState(TaskState.STOPPED);
} else {
workflowCtx.setWorkflowState(TaskState.STOPPING);
}
} else {
workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
// Workflow is in progress if any task is in progress
workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
}
}
// Used to keep track of tasks that have already been assigned to instances.
Set<Integer> assignedPartitions = new HashSet<Integer>();
// Used to keep track of tasks that have failed, but whose failure is acceptable
Set<Integer> skippedPartitions = new HashSet<Integer>();
// Keeps a mapping of (partition) -> (instance, state)
Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
Set<String> excludedInstances = getExcludedInstances(jobResource, workflowConfig, cache);
// Process all the current assignments of tasks.
TaskAssignmentCalculator taskAssignmentCal = getAssignmentCalulator(jobCfg);
Set<Integer> allPartitions = taskAssignmentCal.getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache.getIdealStates());
if (allPartitions == null || allPartitions.isEmpty()) {
// Empty target partitions, mark the job as FAILED.
String failureMsg = "Empty task partition mapping for job " + jobResource + ", marked the job as FAILED!";
LOG.info(failureMsg);
jobCtx.setInfo(failureMsg);
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
markAllPartitionsError(jobCtx, TaskPartitionState.ERROR, false);
return new ResourceAssignment(jobResource);
}
Map<String, SortedSet<Integer>> prevInstanceToTaskAssignments = getPrevInstanceToTaskAssignments(liveInstances, prevTaskToInstanceStateAssignment, allPartitions);
long currentTime = System.currentTimeMillis();
LOG.debug("All partitions: " + allPartitions + " taskAssignment: " + prevInstanceToTaskAssignments + " excludedInstances: " + excludedInstances);
// Iterate through all instances
for (String instance : prevInstanceToTaskAssignments.keySet()) {
if (excludedInstances.contains(instance)) {
continue;
}
Set<Integer> pSet = prevInstanceToTaskAssignments.get(instance);
// Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
// TASK_ERROR, ERROR.
Set<Integer> donePartitions = new TreeSet<Integer>();
for (int pId : pSet) {
final String pName = pName(jobResource, pId);
TaskPartitionState currState = updateJobContextAndGetTaskCurrentState(currStateOutput, jobResource, pId, pName, instance, jobCtx);
// Check for pending state transitions on this (partition, instance).
Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
if (pendingMessage != null && !pendingMessage.getToState().equals(currState.name())) {
processTaskWithPendingMessage(prevTaskToInstanceStateAssignment, pId, pName, instance, pendingMessage, jobState, currState, paMap, assignedPartitions);
continue;
}
// Process any requested state transitions.
String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
if (requestedState.equals(currState)) {
LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
}
paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
continue;
}
switch(currState) {
case RUNNING:
{
TaskPartitionState nextState = TaskPartitionState.RUNNING;
if (jobState == TaskState.TIMING_OUT) {
nextState = TaskPartitionState.TASK_ABORTED;
} else if (jobTgtState == TargetState.STOP) {
nextState = TaskPartitionState.STOPPED;
}
paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
}
break;
case STOPPED:
{
TaskPartitionState nextState;
if (jobTgtState == TargetState.START) {
nextState = TaskPartitionState.RUNNING;
} else {
nextState = TaskPartitionState.STOPPED;
}
paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
assignedPartitions.add(pId);
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
}
break;
case COMPLETED:
{
// The task has completed on this partition. Mark as such in the context object.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
partitionsToDropFromIs.add(pId);
markPartitionCompleted(jobCtx, pId);
}
break;
case TIMED_OUT:
case TASK_ERROR:
case TASK_ABORTED:
case ERROR:
{
// The task may be rescheduled on a different instance.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has error state %s with msg %s. Marking as such in rebalancer context.", pName, currState, jobCtx.getPartitionInfo(pId)));
markPartitionError(jobCtx, pId, currState, true);
// After all tasks are aborted, they will be dropped, because of job timeout.
if (jobState != TaskState.TIMED_OUT && jobState != TaskState.TIMING_OUT) {
if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask() || currState.equals(TaskPartitionState.TASK_ABORTED) || currState.equals(TaskPartitionState.ERROR)) {
skippedPartitions.add(pId);
partitionsToDropFromIs.add(pId);
LOG.debug("skippedPartitions:" + skippedPartitions);
} else {
// Mark the task to be started at some later time (if enabled)
markPartitionDelayed(jobCfg, jobCtx, pId);
}
}
}
break;
case INIT:
case DROPPED:
{
// currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
donePartitions.add(pId);
LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
}
break;
default:
throw new AssertionError("Unknown enum symbol: " + currState);
}
}
// Remove the set of task partitions that are completed or in one of the error states.
pSet.removeAll(donePartitions);
}
addGiveupPartitions(skippedPartitions, jobCtx, allPartitions, jobCfg);
if (jobState == TaskState.IN_PROGRESS && skippedPartitions.size() > jobCfg.getFailureThreshold()) {
if (isJobFinished(jobCtx, jobResource, currStateOutput)) {
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
return buildEmptyAssignment(jobResource, currStateOutput);
}
workflowCtx.setJobState(jobResource, TaskState.FAILING);
// Drop all assigned but not given-up tasks
for (int pId : jobCtx.getPartitionSet()) {
String instance = jobCtx.getAssignedParticipant(pId);
if (jobCtx.getPartitionState(pId) != null && !isTaskGivenup(jobCtx, jobCfg, pId)) {
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.TASK_ABORTED.name()));
}
Partition partition = new Partition(pName(jobResource, pId));
Message pendingMessage = currStateOutput.getPendingState(jobResource, partition, instance);
// so that Helix will cancel the transition.
if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT && pendingMessage != null) {
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.INIT.name()));
}
}
return toResourceAssignment(jobResource, paMap);
}
if (jobState == TaskState.FAILING && isJobFinished(jobCtx, jobResource, currStateOutput)) {
failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
return buildEmptyAssignment(jobResource, currStateOutput);
}
if (isJobComplete(jobCtx, allPartitions, jobCfg)) {
markJobComplete(jobResource, jobCtx, workflowConfig, workflowCtx, cache.getJobConfigMap());
_clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.COMPLETED, jobCtx.getFinishTime() - jobCtx.getStartTime());
_rebalanceScheduler.removeScheduledRebalance(jobResource);
TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
return buildEmptyAssignment(jobResource, currStateOutput);
}
// can be dropped(note that Helix doesn't track whether the drop is success or not).
if (jobState == TaskState.TIMING_OUT && isJobFinished(jobCtx, jobResource, currStateOutput)) {
jobCtx.setFinishTime(System.currentTimeMillis());
workflowCtx.setJobState(jobResource, TaskState.TIMED_OUT);
// Mark all INIT task to TASK_ABORTED
for (int pId : jobCtx.getPartitionSet()) {
if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT) {
jobCtx.setPartitionState(pId, TaskPartitionState.TASK_ABORTED);
}
}
_clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.TIMED_OUT);
_rebalanceScheduler.removeScheduledRebalance(jobResource);
TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
return buildEmptyAssignment(jobResource, currStateOutput);
}
// For delayed tasks, trigger a rebalance event for the closest upcoming ready time
scheduleForNextTask(jobResource, jobCtx, currentTime);
// Make additional task assignments if needed.
if (jobState != TaskState.TIMING_OUT && jobState != TaskState.TIMED_OUT && jobTgtState == TargetState.START) {
// Contains the set of task partitions that must be excluded from consideration when making
// any new assignments.
// This includes all completed, failed, delayed, and already assigned partitions.
Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
addCompletedTasks(excludeSet, jobCtx, allPartitions);
addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
excludeSet.addAll(skippedPartitions);
excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
// Get instance->[partition, ...] mappings for the target resource.
Map<String, SortedSet<Integer>> tgtPartitionAssignments = taskAssignmentCal.getTaskAssignment(currStateOutput, prevTaskToInstanceStateAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache.getIdealStates());
if (!isGenericTaskJob(jobCfg) || jobCfg.isRebalanceRunningTask()) {
dropRebalancedRunningTasks(tgtPartitionAssignments, prevInstanceToTaskAssignments, paMap, jobCtx);
}
for (Map.Entry<String, SortedSet<Integer>> entry : prevInstanceToTaskAssignments.entrySet()) {
String instance = entry.getKey();
if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
continue;
}
// 1. throttled by job configuration
// Contains the set of task partitions currently assigned to the instance.
Set<Integer> pSet = entry.getValue();
int jobCfgLimitation = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
// 2. throttled by participant capacity
int participantCapacity = cache.getInstanceConfigMap().get(instance).getMaxConcurrentTask();
if (participantCapacity == InstanceConfig.MAX_CONCURRENT_TASK_NOT_SET) {
participantCapacity = cache.getClusterConfig().getMaxConcurrentTaskPerInstance();
}
int participantLimitation = participantCapacity - cache.getParticipantActiveTaskCount(instance);
// New tasks to be assigned
int numToAssign = Math.min(jobCfgLimitation, participantLimitation);
LOG.debug(String.format("Throttle tasks to be assigned to instance %s using limitation: Job Concurrent Task(%d), " + "Participant Max Task(%d). Remaining capacity %d.", instance, jobCfgLimitation, participantCapacity, numToAssign));
if (numToAssign > 0) {
Set<Integer> throttledSet = new HashSet<Integer>();
List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, throttledSet, numToAssign);
for (Integer pId : nextPartitions) {
String pName = pName(jobResource, pId);
paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
excludeSet.add(pId);
jobCtx.setAssignedParticipant(pId, instance);
jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
jobCtx.setPartitionStartTime(pId, System.currentTimeMillis());
LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
}
cache.setParticipantActiveTaskCount(instance, cache.getParticipantActiveTaskCount(instance) + nextPartitions.size());
if (!throttledSet.isEmpty()) {
LOG.debug(throttledSet.size() + "tasks are ready but throttled when assigned to participant.");
}
}
}
}
return toResourceAssignment(jobResource, paMap);
}
use of org.apache.helix.model.ResourceAssignment in project helix by apache.
the class DeprecatedTaskRebalancer method emptyAssignment.
private static ResourceAssignment emptyAssignment(String name, CurrentStateOutput currStateOutput) {
ResourceAssignment assignment = new ResourceAssignment(name);
Set<Partition> partitions = currStateOutput.getCurrentStateMappedPartitions(name);
for (Partition partition : partitions) {
Map<String, String> currentStateMap = currStateOutput.getCurrentStateMap(name, partition);
Map<String, String> replicaMap = Maps.newHashMap();
for (String instanceName : currentStateMap.keySet()) {
replicaMap.put(instanceName, HelixDefinedState.DROPPED.toString());
}
assignment.addReplicaMap(partition, replicaMap);
}
return assignment;
}
use of org.apache.helix.model.ResourceAssignment in project helix by apache.
the class DeprecatedTaskRebalancer method computeBestPossiblePartitionState.
@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache clusterData, IdealState taskIs, Resource resource, CurrentStateOutput currStateOutput) {
final String resourceName = resource.getResourceName();
LOG.debug("Computer Best Partition for resource: " + resourceName);
// Fetch job configuration
JobConfig jobCfg = (JobConfig) clusterData.getResourceConfig(resourceName);
if (jobCfg == null) {
LOG.debug("Job configuration is NULL for " + resourceName);
return emptyAssignment(resourceName, currStateOutput);
}
String workflowResource = jobCfg.getWorkflow();
// Fetch workflow configuration and context
WorkflowConfig workflowCfg = clusterData.getWorkflowConfig(workflowResource);
if (workflowCfg == null) {
LOG.debug("Workflow configuration is NULL for " + resourceName);
return emptyAssignment(resourceName, currStateOutput);
}
WorkflowContext workflowCtx = clusterData.getWorkflowContext(workflowResource);
// Initialize workflow context if needed
if (workflowCtx == null) {
workflowCtx = new WorkflowContext(new ZNRecord(TaskUtil.WORKFLOW_CONTEXT_KW));
workflowCtx.setStartTime(System.currentTimeMillis());
workflowCtx.setName(workflowResource);
LOG.info("Workflow context for " + resourceName + " created!");
}
// check ancestor job status
int notStartedCount = 0;
int inCompleteCount = 0;
for (String ancestor : workflowCfg.getJobDag().getAncestors(resourceName)) {
TaskState jobState = workflowCtx.getJobState(ancestor);
if (jobState == null || jobState == TaskState.NOT_STARTED) {
++notStartedCount;
} else if (jobState == TaskState.IN_PROGRESS || jobState == TaskState.STOPPED) {
++inCompleteCount;
}
}
if (notStartedCount > 0 || (workflowCfg.isJobQueue() && inCompleteCount >= workflowCfg.getParallelJobs())) {
LOG.debug("Job is not ready to be scheduled due to pending dependent jobs " + resourceName);
return emptyAssignment(resourceName, currStateOutput);
}
// Clean up if workflow marked for deletion
TargetState targetState = workflowCfg.getTargetState();
if (targetState == TargetState.DELETE) {
LOG.info("Workflow is marked as deleted " + workflowResource + " cleaning up the workflow context.");
cleanup(_manager, resourceName, workflowCfg, workflowResource);
return emptyAssignment(resourceName, currStateOutput);
}
// Check if this workflow has been finished past its expiry.
if (workflowCtx.getFinishTime() != WorkflowContext.UNFINISHED && workflowCtx.getFinishTime() + workflowCfg.getExpiry() <= System.currentTimeMillis()) {
LOG.info("Workflow " + workflowResource + " is completed and passed expiry time, cleaning up the workflow context.");
markForDeletion(_manager, workflowResource);
cleanup(_manager, resourceName, workflowCfg, workflowResource);
return emptyAssignment(resourceName, currStateOutput);
}
// Fetch any existing context information from the property store.
JobContext jobCtx = clusterData.getJobContext(resourceName);
if (jobCtx == null) {
jobCtx = new JobContext(new ZNRecord(TaskUtil.TASK_CONTEXT_KW));
jobCtx.setStartTime(System.currentTimeMillis());
jobCtx.setName(resourceName);
}
// Check for expired jobs for non-terminable workflows
long jobFinishTime = jobCtx.getFinishTime();
if (!workflowCfg.isTerminable() && jobFinishTime != WorkflowContext.UNFINISHED && jobFinishTime + workflowCfg.getExpiry() <= System.currentTimeMillis()) {
LOG.info("Job " + resourceName + " is completed and passed expiry time, cleaning up the job context.");
cleanup(_manager, resourceName, workflowCfg, workflowResource);
return emptyAssignment(resourceName, currStateOutput);
}
// The job is already in a final state (completed/failed).
if (workflowCtx.getJobState(resourceName) == TaskState.FAILED || workflowCtx.getJobState(resourceName) == TaskState.COMPLETED) {
LOG.debug("Job " + resourceName + " is failed or already completed.");
return emptyAssignment(resourceName, currStateOutput);
}
// Check for readiness, and stop processing if it's not ready
boolean isReady = scheduleIfNotReady(workflowCfg, workflowCtx, workflowResource, resourceName, clusterData);
if (!isReady) {
LOG.debug("Job " + resourceName + " is not ready to be scheduled.");
return emptyAssignment(resourceName, currStateOutput);
}
// Grab the old assignment, or an empty one if it doesn't exist
ResourceAssignment prevAssignment = getPrevResourceAssignment(_manager, resourceName);
if (prevAssignment == null) {
prevAssignment = new ResourceAssignment(resourceName);
}
// Will contain the list of partitions that must be explicitly dropped from the ideal state that
// is stored in zk.
// Fetch the previous resource assignment from the property store. This is required because of
// HELIX-230.
Set<Integer> partitionsToDrop = new TreeSet<Integer>();
ResourceAssignment newAssignment = computeResourceMapping(resourceName, workflowCfg, jobCfg, prevAssignment, clusterData.getLiveInstances().keySet(), currStateOutput, workflowCtx, jobCtx, partitionsToDrop, clusterData);
if (!partitionsToDrop.isEmpty()) {
for (Integer pId : partitionsToDrop) {
taskIs.getRecord().getMapFields().remove(pName(resourceName, pId));
}
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
PropertyKey propertyKey = accessor.keyBuilder().idealStates(resourceName);
accessor.setProperty(propertyKey, taskIs);
}
// Update Workflow and Job context in data cache and ZK.
clusterData.updateJobContext(resourceName, jobCtx, _manager.getHelixDataAccessor());
clusterData.updateWorkflowContext(workflowResource, workflowCtx, _manager.getHelixDataAccessor());
setPrevResourceAssignment(_manager, resourceName, newAssignment);
LOG.debug("Job " + resourceName + " new assignment " + Arrays.toString(newAssignment.getMappedPartitions().toArray()));
return newAssignment;
}
use of org.apache.helix.model.ResourceAssignment in project helix by apache.
the class JobRebalancer method computeBestPossiblePartitionState.
@Override
public ResourceAssignment computeBestPossiblePartitionState(ClusterDataCache clusterData, IdealState taskIs, Resource resource, CurrentStateOutput currStateOutput) {
final String jobName = resource.getResourceName();
LOG.debug("Computer Best Partition for job: " + jobName);
// Fetch job configuration
JobConfig jobCfg = clusterData.getJobConfig(jobName);
if (jobCfg == null) {
LOG.error("Job configuration is NULL for " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
String workflowResource = jobCfg.getWorkflow();
// Fetch workflow configuration and context
WorkflowConfig workflowCfg = clusterData.getWorkflowConfig(workflowResource);
if (workflowCfg == null) {
LOG.error("Workflow configuration is NULL for " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
WorkflowContext workflowCtx = clusterData.getWorkflowContext(workflowResource);
if (workflowCtx == null) {
LOG.error("Workflow context is NULL for " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
TargetState targetState = workflowCfg.getTargetState();
if (targetState != TargetState.START && targetState != TargetState.STOP) {
LOG.info("Target state is " + targetState.name() + " for workflow " + workflowResource + ".Stop scheduling job " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
// Stop current run of the job if workflow or job is already in final state (failed or completed)
TaskState workflowState = workflowCtx.getWorkflowState();
TaskState jobState = workflowCtx.getJobState(jobName);
// The job is already in a final state (completed/failed).
if (workflowState == TaskState.FAILED || workflowState == TaskState.COMPLETED || jobState == TaskState.FAILED || jobState == TaskState.COMPLETED) {
LOG.info(String.format("Workflow %s or job %s is already failed or completed, workflow state (%s), job state (%s), clean up job IS.", workflowResource, jobName, workflowState, jobState));
TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobName);
_rebalanceScheduler.removeScheduledRebalance(jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
if (!isWorkflowReadyForSchedule(workflowCfg)) {
LOG.info("Job is not ready to be run since workflow is not ready " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
if (!isJobStarted(jobName, workflowCtx) && !isJobReadyToSchedule(jobName, workflowCfg, workflowCtx, getInCompleteJobCount(workflowCfg, workflowCtx), clusterData.getJobConfigMap())) {
LOG.info("Job is not ready to run " + jobName);
return buildEmptyAssignment(jobName, currStateOutput);
}
// Fetch any existing context information from the property store.
JobContext jobCtx = clusterData.getJobContext(jobName);
if (jobCtx == null) {
jobCtx = new JobContext(new ZNRecord(TaskUtil.TASK_CONTEXT_KW));
jobCtx.setStartTime(System.currentTimeMillis());
jobCtx.setName(jobName);
workflowCtx.setJobState(jobName, TaskState.IN_PROGRESS);
}
if (!TaskState.TIMED_OUT.equals(workflowCtx.getJobState(jobName))) {
scheduleRebalanceForTimeout(jobCfg.getJobId(), jobCtx.getStartTime(), jobCfg.getTimeout());
}
// Grab the old assignment, or an empty one if it doesn't exist
ResourceAssignment prevAssignment = getPrevResourceAssignment(jobName);
if (prevAssignment == null) {
prevAssignment = new ResourceAssignment(jobName);
}
// Will contain the list of partitions that must be explicitly dropped from the ideal state that
// is stored in zk.
// Fetch the previous resource assignment from the property store. This is required because of
// HELIX-230.
Set<String> liveInstances = jobCfg.getInstanceGroupTag() == null ? clusterData.getEnabledLiveInstances() : clusterData.getEnabledLiveInstancesWithTag(jobCfg.getInstanceGroupTag());
if (liveInstances.isEmpty()) {
LOG.error("No available instance found for job!");
}
Set<Integer> partitionsToDrop = new TreeSet<Integer>();
ResourceAssignment newAssignment = computeResourceMapping(jobName, workflowCfg, jobCfg, prevAssignment, liveInstances, currStateOutput, workflowCtx, jobCtx, partitionsToDrop, clusterData);
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
PropertyKey propertyKey = accessor.keyBuilder().idealStates(jobName);
taskIs = clusterData.getIdealState(jobName);
if (!partitionsToDrop.isEmpty() && taskIs != null) {
for (Integer pId : partitionsToDrop) {
taskIs.getRecord().getMapFields().remove(pName(jobName, pId));
}
accessor.setProperty(propertyKey, taskIs);
}
// Update Workflow and Job context in data cache and ZK.
clusterData.updateJobContext(jobName, jobCtx, _manager.getHelixDataAccessor());
clusterData.updateWorkflowContext(workflowResource, workflowCtx, _manager.getHelixDataAccessor());
setPrevResourceAssignment(jobName, newAssignment);
LOG.debug("Job " + jobName + " new assignment " + Arrays.toString(newAssignment.getMappedPartitions().toArray()));
return newAssignment;
}
use of org.apache.helix.model.ResourceAssignment in project helix by apache.
the class JobRebalancer method toResourceAssignment.
private ResourceAssignment toResourceAssignment(String jobResource, Map<Integer, PartitionAssignment> paMap) {
// Construct a ResourceAssignment object from the map of partition assignments.
ResourceAssignment ra = new ResourceAssignment(jobResource);
for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet()) {
PartitionAssignment pa = e.getValue();
ra.addReplicaMap(new Partition(pName(jobResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
}
return ra;
}
Aggregations