Search in sources :

Example 11 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestJobTimeout method testTaskRunningIndefinitely.

@Test
public void testTaskRunningIndefinitely() throws InterruptedException {
    // first job runs indefinitely and timeout, the second job runs successfully, the workflow succeed.
    final String FIRST_JOB = "first_job";
    final String SECOND_JOB = "second_job";
    final String WORKFLOW_NAME = TestHelper.getTestMethodName();
    final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
    JobConfig.Builder firstJobBuilder = new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setJobCommandConfigMap(// task stuck
    ImmutableMap.of(MockTask.TIMEOUT_CONFIG, "99999999")).setTimeout(1000);
    JobConfig.Builder secondJobBuilder = new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setIgnoreDependentJobFailure(// ignore first job's timeout
    true);
    WorkflowConfig.Builder workflowConfigBuilder = new WorkflowConfig.Builder(WORKFLOW_NAME).setFailureThreshold(// workflow ignores first job's timeout and schedule second job and succeed.
    1);
    Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW_NAME).setWorkflowConfig(workflowConfigBuilder.build()).addJob(FIRST_JOB, firstJobBuilder).addJob(SECOND_JOB, secondJobBuilder).addParentChildDependency(FIRST_JOB, SECOND_JOB);
    _driver.start(workflowBuilder.build());
    _driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB), TaskState.TIMED_OUT);
    _driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, SECOND_JOB), TaskState.COMPLETED);
    _driver.pollForWorkflowState(WORKFLOW_NAME, TaskState.COMPLETED);
    JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB));
    for (int pId : jobContext.getPartitionSet()) {
        // All tasks aborted because of job timeout
        Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
    }
}
Also used : WorkflowConfig(org.apache.helix.task.WorkflowConfig) Workflow(org.apache.helix.task.Workflow) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Example 12 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestJobTimeoutTaskNotStarted method testTaskNotStarted.

@Test
public void testTaskNotStarted() throws InterruptedException {
    final String BLOCK_WORKFLOW_NAME = "blockWorkflow";
    final String TIMEOUT_WORKFLOW_NAME = "timeoutWorkflow";
    final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;
    final String TIMEOUT_JOB_1 = "timeoutJob1";
    final String TIMEOUT_JOB_2 = "timeoutJob2";
    // 50 blocking tasks
    JobConfig.Builder blockJobBuilder = new JobConfig.Builder().setWorkflow(BLOCK_WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setNumConcurrentTasksPerInstance(_numParitions);
    Workflow.Builder blockWorkflowBuilder = new Workflow.Builder(BLOCK_WORKFLOW_NAME).addJob("blockJob", blockJobBuilder);
    _driver.start(blockWorkflowBuilder.build());
    Assert.assertTrue(TaskTestUtil.pollForAllTasksBlock(_manager.getHelixDataAccessor(), _participants[0].getInstanceName(), _numParitions, 10000));
    // Now, the HelixTask threadpool is full and blocked by blockJob.
    // New tasks assigned to the instance won't start at all.
    // 2 timeout jobs, first one timeout, but won't block the second one to run, the second one also timeout.
    JobConfig.Builder timeoutJobBuilder = new JobConfig.Builder().setWorkflow(TIMEOUT_WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setNumConcurrentTasksPerInstance(_numParitions).setTimeout(// Wait a bit so that tasks are already assigned to the job (and will be cancelled)
    3000);
    WorkflowConfig.Builder timeoutWorkflowConfigBuilder = new WorkflowConfig.Builder(TIMEOUT_WORKFLOW_NAME).setFailureThreshold(// workflow ignores first job's timeout and schedule second job and succeed.
    1);
    Workflow.Builder timeoutWorkflowBuilder = new Workflow.Builder(TIMEOUT_WORKFLOW_NAME).setWorkflowConfig(timeoutWorkflowConfigBuilder.build()).addJob(TIMEOUT_JOB_1, // job 1 timeout, but won't block job 2
    timeoutJobBuilder);
    // ignore first job's timeout
    timeoutJobBuilder.setIgnoreDependentJobFailure(true);
    // job 2 also timeout
    timeoutWorkflowBuilder.addJob(TIMEOUT_JOB_2, timeoutJobBuilder).addParentChildDependency(TIMEOUT_JOB_1, TIMEOUT_JOB_2);
    _driver.start(timeoutWorkflowBuilder.build());
    _driver.pollForJobState(TIMEOUT_WORKFLOW_NAME, TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_1), TaskState.TIMED_OUT);
    _driver.pollForJobState(TIMEOUT_WORKFLOW_NAME, TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_2), TaskState.TIMED_OUT);
    _driver.pollForWorkflowState(TIMEOUT_WORKFLOW_NAME, TaskState.FAILED);
    JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_1));
    for (int pId : jobContext.getPartitionSet()) {
        // All tasks stuck at INIT->RUNNING, and state transition cancelled and marked TASK_ABORTED
        Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
    }
    jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(TIMEOUT_WORKFLOW_NAME, TIMEOUT_JOB_2));
    for (int pId : jobContext.getPartitionSet()) {
        // All tasks stuck at INIT->RUNNING, and state transition cancelled and marked TASK_ABORTED
        Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
    }
}
Also used : WorkflowConfig(org.apache.helix.task.WorkflowConfig) Workflow(org.apache.helix.task.Workflow) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Example 13 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestRebalanceRunningTask method getNumOfInstances.

private int getNumOfInstances() {
    JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW, JOB));
    Set<String> instances = new HashSet<String>();
    for (int pId : jobContext.getPartitionSet()) {
        instances.add(jobContext.getAssignedParticipant(pId));
    }
    return instances.size();
}
Also used : JobContext(org.apache.helix.task.JobContext) HashSet(java.util.HashSet)

Example 14 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestRecurringJobQueue method testGetNoExistWorkflowConfig.

@Test
public void testGetNoExistWorkflowConfig() {
    String randomName = "randomJob";
    WorkflowConfig workflowConfig = _driver.getWorkflowConfig(randomName);
    Assert.assertNull(workflowConfig);
    JobConfig jobConfig = _driver.getJobConfig(randomName);
    Assert.assertNull(jobConfig);
    WorkflowContext workflowContext = _driver.getWorkflowContext(randomName);
    Assert.assertNull(workflowContext);
    JobContext jobContext = _driver.getJobContext(randomName);
    Assert.assertNull(jobContext);
}
Also used : WorkflowConfig(org.apache.helix.task.WorkflowConfig) WorkflowContext(org.apache.helix.task.WorkflowContext) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Example 15 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestRecurringJobQueue method stopDeleteJobAndResumeRecurrentQueue.

@Test
public void stopDeleteJobAndResumeRecurrentQueue() throws Exception {
    String queueName = TestHelper.getTestMethodName();
    // Create a queue
    LOG.info("Starting job-queue: " + queueName);
    JobQueue.Builder queueBuilder = TaskTestUtil.buildRecurrentJobQueue(queueName, 5);
    // Create and Enqueue jobs
    Map<String, String> commandConfig = ImmutableMap.of(MockTask.TIMEOUT_CONFIG, String.valueOf(500));
    Thread.sleep(100);
    List<String> currentJobNames = createAndEnqueueJob(queueBuilder, 5);
    _driver.createQueue(queueBuilder.build());
    WorkflowContext wCtx = TaskTestUtil.pollForWorkflowContext(_driver, queueName);
    String scheduledQueue = wCtx.getLastScheduledSingleWorkflow();
    // ensure job 1 is started before deleting it
    String deletedJob1 = currentJobNames.get(0);
    String namedSpaceDeletedJob1 = String.format("%s_%s", scheduledQueue, deletedJob1);
    _driver.pollForJobState(scheduledQueue, namedSpaceDeletedJob1, TaskState.IN_PROGRESS, TaskState.COMPLETED);
    // stop the queue
    LOG.info("Pausing job-queue: " + scheduledQueue);
    _driver.stop(queueName);
    _driver.pollForJobState(scheduledQueue, namedSpaceDeletedJob1, TaskState.STOPPED);
    _driver.pollForWorkflowState(scheduledQueue, TaskState.STOPPED);
    // delete the in-progress job (job 1) and verify it being deleted
    _driver.deleteJob(queueName, deletedJob1);
    verifyJobDeleted(queueName, namedSpaceDeletedJob1);
    verifyJobDeleted(scheduledQueue, namedSpaceDeletedJob1);
    LOG.info("Resuming job-queue: " + queueName);
    _driver.resume(queueName);
    // ensure job 2 is started
    _driver.pollForJobState(scheduledQueue, String.format("%s_%s", scheduledQueue, currentJobNames.get(1)), TaskState.IN_PROGRESS, TaskState.COMPLETED);
    // stop the queue
    LOG.info("Pausing job-queue: " + queueName);
    _driver.stop(queueName);
    _driver.pollForJobState(scheduledQueue, String.format("%s_%s", scheduledQueue, currentJobNames.get(1)), TaskState.STOPPED);
    _driver.pollForWorkflowState(scheduledQueue, TaskState.STOPPED);
    // Ensure job 3 is not started before deleting it
    String deletedJob2 = currentJobNames.get(2);
    String namedSpaceDeletedJob2 = String.format("%s_%s", scheduledQueue, deletedJob2);
    TaskTestUtil.pollForEmptyJobState(_driver, scheduledQueue, namedSpaceDeletedJob2);
    // delete not-started job (job 3) and verify it being deleted
    _driver.deleteJob(queueName, deletedJob2);
    verifyJobDeleted(queueName, namedSpaceDeletedJob2);
    verifyJobDeleted(scheduledQueue, namedSpaceDeletedJob2);
    LOG.info("Resuming job-queue: " + queueName);
    _driver.resume(queueName);
    // Ensure the jobs left are successful completed in the correct order
    currentJobNames.remove(deletedJob1);
    currentJobNames.remove(deletedJob2);
    long preJobFinish = 0;
    for (int i = 0; i < currentJobNames.size(); i++) {
        String namedSpaceJobName = String.format("%s_%s", scheduledQueue, currentJobNames.get(i));
        _driver.pollForJobState(scheduledQueue, namedSpaceJobName, TaskState.COMPLETED);
        JobContext jobContext = _driver.getJobContext(namedSpaceJobName);
        long jobStart = jobContext.getStartTime();
        Assert.assertTrue(jobStart >= preJobFinish);
        preJobFinish = jobContext.getFinishTime();
    }
// verify the job is not there for the next recurrence of queue schedule
}
Also used : JobQueue(org.apache.helix.task.JobQueue) WorkflowContext(org.apache.helix.task.WorkflowContext) JobContext(org.apache.helix.task.JobContext) Test(org.testng.annotations.Test)

Aggregations

JobContext (org.apache.helix.task.JobContext)35 JobConfig (org.apache.helix.task.JobConfig)28 Test (org.testng.annotations.Test)25 Workflow (org.apache.helix.task.Workflow)18 WorkflowConfig (org.apache.helix.task.WorkflowConfig)11 WorkflowContext (org.apache.helix.task.WorkflowContext)9 TaskPartitionState (org.apache.helix.task.TaskPartitionState)8 JobQueue (org.apache.helix.task.JobQueue)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)4 TaskConfig (org.apache.helix.task.TaskConfig)4 HelixDataAccessor (org.apache.helix.HelixDataAccessor)3 PropertyKey (org.apache.helix.PropertyKey)3 ZNRecord (org.apache.helix.ZNRecord)3 TaskDriver (org.apache.helix.task.TaskDriver)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 GET (javax.ws.rs.GET)2 Path (javax.ws.rs.Path)2 HelixException (org.apache.helix.HelixException)2