Search in sources :

Example 21 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestJobFailureTaskNotStarted method testTaskNotStarted.

@Test
public void testTaskNotStarted() throws InterruptedException {
    setupUnbalancedDB();
    final String BLOCK_WORKFLOW_NAME = "blockWorkflow";
    final String FAIL_WORKFLOW_NAME = "failWorkflow";
    final String FAIL_JOB_NAME = "failJob";
    ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
    final int numTask = configAccessor.getClusterConfig(CLUSTER_NAME).getMaxConcurrentTaskPerInstance();
    // Tasks targeting the unbalanced DB, the instance is setup to stuck on INIT->RUNNING, so it takes all threads
    // on that instance.
    JobConfig.Builder blockJobBuilder = new JobConfig.Builder().setWorkflow(BLOCK_WORKFLOW_NAME).setTargetResource(UNBALANCED_DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setNumConcurrentTasksPerInstance(numTask);
    Workflow.Builder blockWorkflowBuilder = new Workflow.Builder(BLOCK_WORKFLOW_NAME).addJob("blockJob", blockJobBuilder);
    _driver.start(blockWorkflowBuilder.build());
    Assert.assertTrue(TaskTestUtil.pollForAllTasksBlock(_manager.getHelixDataAccessor(), _blockedParticipant.getInstanceName(), numTask, 10000));
    // Now, all HelixTask threads are stuck at INIT->RUNNING for task state transition(user task can't be submitted)
    // New tasks assigned to the instance won't start INIT->RUNNING transition at all.
    // A to-be-failed job, 2 tasks, 1 stuck and 1 fail, making the job fail.
    JobConfig.Builder failJobBuilder = new JobConfig.Builder().setWorkflow(FAIL_WORKFLOW_NAME).setTargetResource(DB_NAME).setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name())).setCommand(MockTask.TASK_COMMAND).setJobCommandConfigMap(ImmutableMap.of(MockTask.TASK_RESULT_STATUS, TaskResult.Status.FAILED.name()));
    Workflow.Builder failWorkflowBuilder = new Workflow.Builder(FAIL_WORKFLOW_NAME).addJob(FAIL_JOB_NAME, failJobBuilder);
    _driver.start(failWorkflowBuilder.build());
    _driver.pollForJobState(FAIL_WORKFLOW_NAME, TaskUtil.getNamespacedJobName(FAIL_WORKFLOW_NAME, FAIL_JOB_NAME), TaskState.FAILED);
    _driver.pollForWorkflowState(FAIL_WORKFLOW_NAME, TaskState.FAILED);
    JobContext jobContext = _driver.getJobContext(TaskUtil.getNamespacedJobName(FAIL_WORKFLOW_NAME, FAIL_JOB_NAME));
    for (int pId : jobContext.getPartitionSet()) {
        if (jobContext.getAssignedParticipant(pId).equals(_blockedParticipant.getInstanceName())) {
            Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
        } else if (jobContext.getAssignedParticipant(pId).equals(_normalParticipant.getInstanceName())) {
            Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ERROR);
        } else {
            throw new HelixException("There should be only 2 instances, 1 blocked, 1 normal.");
        }
    }
}
Also used : HelixException(org.apache.helix.HelixException) Workflow(org.apache.helix.task.Workflow) ConfigAccessor(org.apache.helix.ConfigAccessor) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Example 22 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestJobQueueCleanUp method testJobQueueAutoCleanUp.

@Test
public void testJobQueueAutoCleanUp() throws InterruptedException {
    int capacity = 10;
    String queueName = TestHelper.getTestMethodName();
    JobQueue.Builder builder = TaskTestUtil.buildJobQueue(queueName, capacity);
    WorkflowConfig.Builder cfgBuilder = new WorkflowConfig.Builder(builder.getWorkflowConfig());
    cfgBuilder.setJobPurgeInterval(1000);
    builder.setWorkflowConfig(cfgBuilder.build());
    JobConfig.Builder jobBuilder = new JobConfig.Builder().setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB).setCommand(MockTask.TASK_COMMAND).setMaxAttemptsPerTask(2).setJobCommandConfigMap(ImmutableMap.of(MockTask.SUCCESS_COUNT_BEFORE_FAIL, String.valueOf(capacity / 2))).setExpiry(200L);
    Set<String> deletedJobs = new HashSet<String>();
    Set<String> remainJobs = new HashSet<String>();
    for (int i = 0; i < capacity; i++) {
        builder.enqueueJob("JOB" + i, jobBuilder);
        if (i < capacity / 2) {
            deletedJobs.add("JOB" + i);
        } else {
            remainJobs.add(TaskUtil.getNamespacedJobName(queueName, "JOB" + i));
        }
    }
    _driver.start(builder.build());
    _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, "JOB" + (capacity - 1)), TaskState.FAILED);
    Thread.sleep(2000);
    WorkflowConfig config = _driver.getWorkflowConfig(queueName);
    Assert.assertEquals(config.getJobDag().getAllNodes(), remainJobs);
    WorkflowContext context = _driver.getWorkflowContext(queueName);
    Assert.assertEquals(context.getJobStates().keySet(), remainJobs);
    Assert.assertTrue(remainJobs.containsAll(context.getJobStartTimes().keySet()));
    for (String job : deletedJobs) {
        JobConfig cfg = _driver.getJobConfig(job);
        JobContext ctx = _driver.getJobContext(job);
        Assert.assertNull(cfg);
        Assert.assertNull(ctx);
    }
}
Also used : JobQueue(org.apache.helix.task.JobQueue) WorkflowContext(org.apache.helix.task.WorkflowContext) JobConfig(org.apache.helix.task.JobConfig) WorkflowConfig(org.apache.helix.task.WorkflowConfig) JobContext(org.apache.helix.task.JobContext) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 23 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TaskTestUtil method pollForWorkflowParallelState.

// 1. Different jobs in a same work flow is in RUNNING at the same time
// 2. When disallow overlap assignment, no two jobs in the same work flow is in RUNNING at the same instance
// Use this method with caution because it assumes workflow doesn't finish too quickly and number of parallel running
// tasks can be counted.
public static boolean pollForWorkflowParallelState(TaskDriver driver, String workflowName) throws InterruptedException {
    WorkflowConfig workflowConfig = driver.getWorkflowConfig(workflowName);
    Assert.assertNotNull(workflowConfig);
    WorkflowContext workflowContext = null;
    while (workflowContext == null) {
        workflowContext = driver.getWorkflowContext(workflowName);
        Thread.sleep(100);
    }
    int maxRunningCount = 0;
    boolean finished = false;
    while (!finished) {
        finished = true;
        int runningCount = 0;
        workflowContext = driver.getWorkflowContext(workflowName);
        for (String jobName : workflowConfig.getJobDag().getAllNodes()) {
            TaskState jobState = workflowContext.getJobState(jobName);
            if (jobState == TaskState.IN_PROGRESS) {
                ++runningCount;
                finished = false;
            }
        }
        if (runningCount > maxRunningCount) {
            maxRunningCount = runningCount;
        }
        List<JobContext> jobContextList = new ArrayList<JobContext>();
        for (String jobName : workflowConfig.getJobDag().getAllNodes()) {
            JobContext jobContext = driver.getJobContext(jobName);
            if (jobContext != null) {
                jobContextList.add(driver.getJobContext(jobName));
            }
        }
        if (!workflowConfig.isAllowOverlapJobAssignment()) {
            Set<String> instances = new HashSet<String>();
            for (JobContext jobContext : jobContextList) {
                for (int partition : jobContext.getPartitionSet()) {
                    String instance = jobContext.getAssignedParticipant(partition);
                    TaskPartitionState taskPartitionState = jobContext.getPartitionState(partition);
                    if (instance == null) {
                        continue;
                    }
                    if (taskPartitionState != TaskPartitionState.INIT && taskPartitionState != TaskPartitionState.RUNNING) {
                        continue;
                    }
                    if (instances.contains(instance)) {
                        return false;
                    }
                    TaskPartitionState state = jobContext.getPartitionState(partition);
                    if (state != TaskPartitionState.COMPLETED) {
                        instances.add(instance);
                    }
                }
            }
        }
        Thread.sleep(100);
    }
    return maxRunningCount > 1 && (workflowConfig.isJobQueue() ? maxRunningCount <= workflowConfig.getParallelJobs() : true);
}
Also used : WorkflowConfig(org.apache.helix.task.WorkflowConfig) WorkflowContext(org.apache.helix.task.WorkflowContext) ArrayList(java.util.ArrayList) TaskPartitionState(org.apache.helix.task.TaskPartitionState) JobContext(org.apache.helix.task.JobContext) TaskState(org.apache.helix.task.TaskState) HashSet(java.util.HashSet)

Example 24 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestTaskRebalancer method partitionSet.

@Test
public void partitionSet() throws Exception {
    final String jobResource = "partitionSet";
    ImmutableList<String> targetPartitions = ImmutableList.of("TestDB_1", "TestDB_2", "TestDB_3", "TestDB_5", "TestDB_8", "TestDB_13");
    // construct and submit our basic workflow
    Map<String, String> commandConfig = ImmutableMap.of(TIMEOUT_CONFIG, String.valueOf(100));
    JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
    jobBuilder.setJobCommandConfigMap(commandConfig).setMaxAttemptsPerTask(1).setTargetPartitions(targetPartitions);
    Workflow flow = WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
    _driver.start(flow);
    // wait for job completeness/timeout
    _driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);
    // see if resulting context completed successfully for our partition set
    String namespacedName = TaskUtil.getNamespacedJobName(jobResource);
    JobContext ctx = _driver.getJobContext(namespacedName);
    WorkflowContext workflowContext = _driver.getWorkflowContext(jobResource);
    Assert.assertNotNull(ctx);
    Assert.assertNotNull(workflowContext);
    Assert.assertEquals(workflowContext.getJobState(namespacedName), TaskState.COMPLETED);
    for (String pName : targetPartitions) {
        int i = ctx.getPartitionsByTarget().get(pName).get(0);
        Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
        Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
    }
}
Also used : WorkflowContext(org.apache.helix.task.WorkflowContext) Workflow(org.apache.helix.task.Workflow) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Example 25 with JobContext

use of org.apache.helix.task.JobContext in project helix by apache.

the class TestTaskRebalancer method timeouts.

@Test
public void timeouts() throws Exception {
    final String jobResource = "timeouts";
    JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
    jobBuilder.setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG).setMaxAttemptsPerTask(2).setTimeoutPerTask(100);
    Workflow flow = WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
    _driver.start(flow);
    // Wait until the job reports failure.
    _driver.pollForWorkflowState(jobResource, TaskState.FAILED);
    // Check that all partitions timed out up to maxAttempts
    JobContext ctx = _driver.getJobContext(TaskUtil.getNamespacedJobName(jobResource));
    int maxAttempts = 0;
    boolean sawTimedoutTask = false;
    for (int i = 0; i < _numParitions; i++) {
        TaskPartitionState state = ctx.getPartitionState(i);
        if (state != null) {
            if (state == TaskPartitionState.TIMED_OUT) {
                sawTimedoutTask = true;
            }
            // At least one task timed out, other might be aborted due to job failure.
            Assert.assertTrue(state == TaskPartitionState.TIMED_OUT || state == TaskPartitionState.TASK_ABORTED);
            maxAttempts = Math.max(maxAttempts, ctx.getPartitionNumAttempts(i));
        }
    }
    Assert.assertTrue(sawTimedoutTask);
    Assert.assertEquals(maxAttempts, 2);
}
Also used : Workflow(org.apache.helix.task.Workflow) TaskPartitionState(org.apache.helix.task.TaskPartitionState) JobContext(org.apache.helix.task.JobContext) JobConfig(org.apache.helix.task.JobConfig) Test(org.testng.annotations.Test)

Aggregations

JobContext (org.apache.helix.task.JobContext)35 JobConfig (org.apache.helix.task.JobConfig)28 Test (org.testng.annotations.Test)25 Workflow (org.apache.helix.task.Workflow)18 WorkflowConfig (org.apache.helix.task.WorkflowConfig)11 WorkflowContext (org.apache.helix.task.WorkflowContext)9 TaskPartitionState (org.apache.helix.task.TaskPartitionState)8 JobQueue (org.apache.helix.task.JobQueue)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)4 TaskConfig (org.apache.helix.task.TaskConfig)4 HelixDataAccessor (org.apache.helix.HelixDataAccessor)3 PropertyKey (org.apache.helix.PropertyKey)3 ZNRecord (org.apache.helix.ZNRecord)3 TaskDriver (org.apache.helix.task.TaskDriver)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 GET (javax.ws.rs.GET)2 Path (javax.ws.rs.Path)2 HelixException (org.apache.helix.HelixException)2