Search in sources :

Example 66 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class RemoteTaskRunnerTest method testBlacklistZKWorkers25Percent.

/**
 * With 2 workers and maxPercentageBlacklistWorkers(25), neither worker should ever be blacklisted even after
 * exceeding maxRetriesBeforeBlacklist.
 */
@Test
public void testBlacklistZKWorkers25Percent() throws Exception {
    rtrTestUtils.makeWorker("worker", 10);
    rtrTestUtils.makeWorker("worker2", 10);
    RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
    rtrConfig.setMaxPercentageBlacklistWorkers(25);
    makeRemoteTaskRunner(rtrConfig);
    String firstWorker = null;
    String secondWorker = null;
    for (int i = 1; i < 13; i++) {
        String taskId = StringUtils.format("rt-%d", i);
        TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
        Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
        if (i == 1) {
            if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
                firstWorker = "worker2";
                secondWorker = "worker";
            } else {
                firstWorker = "worker";
                secondWorker = "worker2";
            }
        }
        final String expectedWorker = i % 2 == 0 ? secondWorker : firstWorker;
        Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
        rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
        rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
        Assert.assertTrue(taskFuture.get().isFailure());
        Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
        Assert.assertEquals(((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
    }
}
Also used : TestRealtimeTask(org.apache.druid.indexing.common.TestRealtimeTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 67 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class RemoteTaskRunnerTest method testWorkerRemoved.

@Test
public void testWorkerRemoved() throws Exception {
    doSetup();
    Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
    Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
    Future<TaskStatus> future = remoteTaskRunner.run(task);
    Assert.assertTrue(taskAnnounced(task.getId()));
    mockWorkerRunningTask(task);
    Assert.assertTrue(workerRunningTask(task.getId()));
    cf.delete().forPath(ANNOUCEMENTS_PATH);
    TaskStatus status = future.get();
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    Assert.assertNotNull(status.getErrorMsg());
    Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup"));
    RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig();
    Assert.assertTrue(TestUtils.conditionValid(new IndexingServiceCondition() {

        @Override
        public boolean isValid() {
            return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty();
        }
    }, // cleanup task is independently scheduled by event listener. we need to wait some more time.
    config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2));
    Assert.assertNull(cf.checkExists().forPath(STATUS_PATH));
    Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
    Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
}
Also used : IndexingServiceCondition(org.apache.druid.indexing.common.IndexingServiceCondition) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 68 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class RemoteTaskRunnerTest method testRunPendingTaskTimeoutToAssign.

@Test
public void testRunPendingTaskTimeoutToAssign() throws Exception {
    makeWorker();
    makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD));
    RemoteTaskRunnerWorkItem workItem = remoteTaskRunner.addPendingTask(task);
    remoteTaskRunner.runPendingTask(workItem);
    TaskStatus taskStatus = workItem.getResult().get(0, TimeUnit.MILLISECONDS);
    Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode());
    Assert.assertNotNull(taskStatus.getErrorMsg());
    Assert.assertTrue(taskStatus.getErrorMsg().startsWith("The worker that this task is assigned did not start it in timeout"));
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) Test(org.junit.Test)

Example 69 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class RemoteTaskRunnerTest method testStatusRemoved.

@Test
public void testStatusRemoved() throws Exception {
    doSetup();
    ListenableFuture<TaskStatus> future = remoteTaskRunner.run(task);
    Assert.assertTrue(taskAnnounced(task.getId()));
    mockWorkerRunningTask(task);
    Assert.assertTrue(workerRunningTask(task.getId()));
    Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTaskId().equals(task.getId()));
    cf.delete().forPath(JOINER.join(STATUS_PATH, task.getId()));
    TaskStatus status = future.get();
    Assert.assertEquals(status.getStatusCode(), TaskState.FAILED);
    Assert.assertNotNull(status.getErrorMsg());
    Assert.assertTrue(status.getErrorMsg().contains("The worker that this task was assigned disappeared"));
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) Test(org.junit.Test)

Example 70 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class RemoteTaskRunnerTest method testBlacklistZKWorkers.

@Test
public void testBlacklistZKWorkers() throws Exception {
    makeWorker();
    RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
    rtrConfig.setMaxPercentageBlacklistWorkers(100);
    makeRemoteTaskRunner(rtrConfig);
    TestRealtimeTask task1 = new TestRealtimeTask("realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper);
    Future<TaskStatus> taskFuture1 = remoteTaskRunner.run(task1);
    Assert.assertTrue(taskAnnounced(task1.getId()));
    mockWorkerRunningTask(task1);
    mockWorkerCompleteFailedTask(task1);
    Assert.assertTrue(taskFuture1.get().isFailure());
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(1, remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount());
    TestRealtimeTask task2 = new TestRealtimeTask("realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.running("realtime2"), jsonMapper);
    Future<TaskStatus> taskFuture2 = remoteTaskRunner.run(task2);
    Assert.assertTrue(taskAnnounced(task2.getId()));
    mockWorkerRunningTask(task2);
    mockWorkerCompleteFailedTask(task2);
    Assert.assertTrue(taskFuture2.get().isFailure());
    Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(2, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
    ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis());
    remoteTaskRunner.checkBlackListedNodes();
    Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
    ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis());
    remoteTaskRunner.checkBlackListedNodes();
    // After backOffTime the nodes are removed from blacklist
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
    TestRealtimeTask task3 = new TestRealtimeTask("realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.running("realtime3"), jsonMapper);
    Future<TaskStatus> taskFuture3 = remoteTaskRunner.run(task3);
    Assert.assertTrue(taskAnnounced(task3.getId()));
    mockWorkerRunningTask(task3);
    mockWorkerCompleteSuccessfulTask(task3);
    Assert.assertTrue(taskFuture3.get().isSuccess());
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount());
}
Also used : TestRealtimeTask(org.apache.druid.indexing.common.TestRealtimeTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Aggregations

TaskStatus (org.apache.druid.indexer.TaskStatus)135 Test (org.junit.Test)103 DataSegment (org.apache.druid.timeline.DataSegment)55 List (java.util.List)50 ImmutableList (com.google.common.collect.ImmutableList)44 ArrayList (java.util.ArrayList)41 TaskToolbox (org.apache.druid.indexing.common.TaskToolbox)40 Task (org.apache.druid.indexing.common.task.Task)39 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)37 Map (java.util.Map)34 File (java.io.File)32 IOException (java.io.IOException)26 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)25 ImmutableMap (com.google.common.collect.ImmutableMap)25 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)25 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 ISE (org.apache.druid.java.util.common.ISE)24 HashMap (java.util.HashMap)23 Executor (java.util.concurrent.Executor)23 Pair (org.apache.druid.java.util.common.Pair)23