use of org.apache.helix.task.JobContext in project helix by apache.
the class TestJobTimeout method testTaskRunningIndefinitely.
@Test
public void testTaskRunningIndefinitely() throws InterruptedException {
// first job runs indefinitely and timeout, the second job runs successfully, the workflow succeed.
final String FIRST_JOB = "first_job";
final String SECOND_JOB = "second_job";
final String WORKFLOW_NAME = TestHelper.getTestMethodName();
final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
JobConfig.Builder firstJobBuilder = new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setJobCommandConfigMap(// task stuck
ImmutableMap.of(MockTask.TIMEOUT_CONFIG, "99999999")).setTimeout(1000);
JobConfig.Builder secondJobBuilder = new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setIgnoreDependentJobFailure(// ignore first job's timeout
true);
WorkflowConfig.Builder workflowConfigBuilder = new WorkflowConfig.Builder(WORKFLOW_NAME).setFailureThreshold(// workflow ignores first job's timeout and schedule second job and succeed.
1);
Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW_NAME).setWorkflowConfig(workflowConfigBuilder.build()).addJob(FIRST_JOB, firstJobBuilder).addJob(SECOND_JOB, secondJobBuilder).addParentChildDependency(FIRST_JOB, SECOND_JOB);
_driver.start(workflowBuilder.build());
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB), TaskState.TIMED_OUT);
_driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, SECOND_JOB), TaskState.COMPLETED);
_driver.pollForWorkflowState(WORKFLOW_NAME, TaskState.COMPLETED);
JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB));
for (int pId : jobContext.getPartitionSet()) {
// All tasks aborted because of job timeout
Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
}
}
use of org.apache.helix.task.JobContext in project helix by apache.
the class TestJobTimeoutTaskNotStarted method testTaskNotStarted.
@Test
public void testTaskNotStarted() throws InterruptedException {
final String BLOCK_WORKFLOW_NAME = "blockWorkflow";
final String TIMEOUT_WORKFLOW_NAME = "timeoutWorkflow";
final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
final String TIMEOUT_JOB_1 = "timeoutJob1";
final String TIMEOUT_JOB_2 = "timeoutJob2";
// 50 blocking tasks
JobConfig.Builder blockJobBuilder = new JobConfig.Builder().setWorkflow(BLOCK_WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setNumConcurrentTasksPerInstance(_numParitions);
Workflow.Builder blockWorkflowBuilder = new Workflow.Builder(BLOCK_WORKFLOW_NAME).addJob("blockJob", blockJobBuilder);
_driver.start(blockWorkflowBuilder.build());
Assert.assertTrue(TaskTestUtil.pollForAllTasksBlock(_manager.getHelixDataAccessor(), _participants[0].getInstanceName(), _numParitions, 10000));
// Now, the HelixTask threadpool is full and blocked by blockJob.
// New tasks assigned to the instance won't start at all.
// 2 timeout jobs, first one timeout, but won't block the second one to run, the second one also timeout.
JobConfig.Builder timeoutJobBuilder = new JobConfig.Builder().setWorkflow(TIMEOUT_WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setNumConcurrentTasksPerInstance(_numParitions).setTimeout(// Wait a bit so that tasks are already assigned to the job (and will be cancelled)
3000);
WorkflowConfig.Builder timeoutWorkflowConfigBuilder = new WorkflowConfig.Builder(TIMEOUT_WORKFLOW_NAME).setFailureThreshold(// workflow ignores first job's timeout and schedule second job and succeed.
1);
Workflow.Builder timeoutWorkflowBuilder = new Workflow.Builder(TIMEOUT_WORKFLOW_NAME).setWorkflowConfig(timeoutWorkflowConfigBuilder.build()).addJob(TIMEOUT_JOB_1, // job 1 timeout, but won't block job 2
timeoutJobBuilder);
// ignore first job's timeout
timeoutJobBuilder.setIgnoreDependentJobFailure(true);
// job 2 also timeout
timeoutWorkflowBuilder.addJob(TIMEOUT_JOB_2, timeoutJobBuilder).addParentChildDependency(TIMEOUT_JOB_1, TIMEOUT_JOB_2);
_driver.start(timeoutWorkflowBuilder.build());
_driver.pollForJobState(TIMEOUT_WORKFLOW_NAME, TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_1), TaskState.TIMED_OUT);
_driver.pollForJobState(TIMEOUT_WORKFLOW_NAME, TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_2), TaskState.TIMED_OUT);
_driver.pollForWorkflowState(TIMEOUT_WORKFLOW_NAME, TaskState.FAILED);
JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_1));
for (int pId : jobContext.getPartitionSet()) {
// All tasks stuck at INIT->RUNNING, and state transition cancelled and marked TASK_ABORTED
Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
}
jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_2));
for (int pId : jobContext.getPartitionSet()) {
// All tasks stuck at INIT->RUNNING, and state transition cancelled and marked TASK_ABORTED
Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
}
}
use of org.apache.helix.task.JobContext in project helix by apache.
the class TestRebalanceRunningTask method getNumOfInstances.
private int getNumOfInstances() {
JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW, JOB));
Set<String> instances = new HashSet<String>();
for (int pId : jobContext.getPartitionSet()) {
instances.add(jobContext.getAssignedParticipant(pId));
}
return instances.size();
}
use of org.apache.helix.task.JobContext in project helix by apache.
the class TestRecurringJobQueue method testGetNoExistWorkflowConfig.
@Test
public void testGetNoExistWorkflowConfig() {
String randomName = "randomJob";
WorkflowConfig workflowConfig = _driver.getWorkflowConfig(randomName);
Assert.assertNull(workflowConfig);
JobConfig jobConfig = _driver.getJobConfig(randomName);
Assert.assertNull(jobConfig);
WorkflowContext workflowContext = _driver.getWorkflowContext(randomName);
Assert.assertNull(workflowContext);
JobContext jobContext = _driver.getJobContext(randomName);
Assert.assertNull(jobContext);
}
use of org.apache.helix.task.JobContext in project helix by apache.
the class TestRecurringJobQueue method stopDeleteJobAndResumeRecurrentQueue.
@Test
public void stopDeleteJobAndResumeRecurrentQueue() throws Exception {
String queueName = TestHelper.getTestMethodName();
// Create a queue
LOG.info("Starting job-queue: " + queueName);
JobQueue.Builder queueBuilder = TaskTestUtil.buildRecurrentJobQueue(queueName, 5);
// Create and Enqueue jobs
Map<String, String> commandConfig = ImmutableMap.of(MockTask.TIMEOUT_CONFIG, String.valueOf(500));
Thread.sleep(100);
List<String> currentJobNames = createAndEnqueueJob(queueBuilder, 5);
_driver.createQueue(queueBuilder.build());
WorkflowContext wCtx = TaskTestUtil.pollForWorkflowContext(_driver, queueName);
String scheduledQueue = wCtx.getLastScheduledSingleWorkflow();
// ensure job 1 is started before deleting it
String deletedJob1 = currentJobNames.get(0);
String namedSpaceDeletedJob1 = String.format("%s_%s", scheduledQueue, deletedJob1);
_driver.pollForJobState(scheduledQueue, namedSpaceDeletedJob1, TaskState.IN_PROGRESS, TaskState.COMPLETED);
// stop the queue
LOG.info("Pausing job-queue: " + scheduledQueue);
_driver.stop(queueName);
_driver.pollForJobState(scheduledQueue, namedSpaceDeletedJob1, TaskState.STOPPED);
_driver.pollForWorkflowState(scheduledQueue, TaskState.STOPPED);
// delete the in-progress job (job 1) and verify it being deleted
_driver.deleteJob(queueName, deletedJob1);
verifyJobDeleted(queueName, namedSpaceDeletedJob1);
verifyJobDeleted(scheduledQueue, namedSpaceDeletedJob1);
LOG.info("Resuming job-queue: " + queueName);
_driver.resume(queueName);
// ensure job 2 is started
_driver.pollForJobState(scheduledQueue, String.format("%s_%s", scheduledQueue, currentJobNames.get(1)), TaskState.IN_PROGRESS, TaskState.COMPLETED);
// stop the queue
LOG.info("Pausing job-queue: " + queueName);
_driver.stop(queueName);
_driver.pollForJobState(scheduledQueue, String.format("%s_%s", scheduledQueue, currentJobNames.get(1)), TaskState.STOPPED);
_driver.pollForWorkflowState(scheduledQueue, TaskState.STOPPED);
// Ensure job 3 is not started before deleting it
String deletedJob2 = currentJobNames.get(2);
String namedSpaceDeletedJob2 = String.format("%s_%s", scheduledQueue, deletedJob2);
TaskTestUtil.pollForEmptyJobState(_driver, scheduledQueue, namedSpaceDeletedJob2);
// delete not-started job (job 3) and verify it being deleted
_driver.deleteJob(queueName, deletedJob2);
verifyJobDeleted(queueName, namedSpaceDeletedJob2);
verifyJobDeleted(scheduledQueue, namedSpaceDeletedJob2);
LOG.info("Resuming job-queue: " + queueName);
_driver.resume(queueName);
// Ensure the jobs left are successful completed in the correct order
currentJobNames.remove(deletedJob1);
currentJobNames.remove(deletedJob2);
long preJobFinish = 0;
for (int i = 0; i < currentJobNames.size(); i++) {
String namedSpaceJobName = String.format("%s_%s", scheduledQueue, currentJobNames.get(i));
_driver.pollForJobState(scheduledQueue, namedSpaceJobName, TaskState.COMPLETED);
JobContext jobContext = _driver.getJobContext(namedSpaceJobName);
long jobStart = jobContext.getStartTime();
Assert.assertTrue(jobStart >= preJobFinish);
preJobFinish = jobContext.getFinishTime();
}
// verify the job is not there for the next recurrence of queue schedule
}
Aggregations