use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RemoteTaskRunnerTest method testBlacklistZKWorkers25Percent.
/**
* With 2 workers and maxPercentageBlacklistWorkers(25), neither worker should ever be blacklisted even after
* exceeding maxRetriesBeforeBlacklist.
*/
@Test
public void testBlacklistZKWorkers25Percent() throws Exception {
rtrTestUtils.makeWorker("worker", 10);
rtrTestUtils.makeWorker("worker2", 10);
RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
rtrConfig.setMaxPercentageBlacklistWorkers(25);
makeRemoteTaskRunner(rtrConfig);
String firstWorker = null;
String secondWorker = null;
for (int i = 1; i < 13; i++) {
String taskId = StringUtils.format("rt-%d", i);
TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
if (i == 1) {
if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
firstWorker = "worker2";
secondWorker = "worker";
} else {
firstWorker = "worker";
secondWorker = "worker2";
}
}
final String expectedWorker = i % 2 == 0 ? secondWorker : firstWorker;
Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
Assert.assertTrue(taskFuture.get().isFailure());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
}
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RemoteTaskRunnerTest method testWorkerRemoved.
@Test
public void testWorkerRemoved() throws Exception {
doSetup();
Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
Future<TaskStatus> future = remoteTaskRunner.run(task);
Assert.assertTrue(taskAnnounced(task.getId()));
mockWorkerRunningTask(task);
Assert.assertTrue(workerRunningTask(task.getId()));
cf.delete().forPath(ANNOUCEMENTS_PATH);
TaskStatus status = future.get();
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
Assert.assertNotNull(status.getErrorMsg());
Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup"));
RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig();
Assert.assertTrue(TestUtils.conditionValid(new IndexingServiceCondition() {
@Override
public boolean isValid() {
return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty();
}
}, // cleanup task is independently scheduled by event listener. we need to wait some more time.
config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2));
Assert.assertNull(cf.checkExists().forPath(STATUS_PATH));
Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RemoteTaskRunnerTest method testRunPendingTaskTimeoutToAssign.
@Test
public void testRunPendingTaskTimeoutToAssign() throws Exception {
makeWorker();
makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD));
RemoteTaskRunnerWorkItem workItem = remoteTaskRunner.addPendingTask(task);
remoteTaskRunner.runPendingTask(workItem);
TaskStatus taskStatus = workItem.getResult().get(0, TimeUnit.MILLISECONDS);
Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode());
Assert.assertNotNull(taskStatus.getErrorMsg());
Assert.assertTrue(taskStatus.getErrorMsg().startsWith("The worker that this task is assigned did not start it in timeout"));
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RemoteTaskRunnerTest method testStatusRemoved.
@Test
public void testStatusRemoved() throws Exception {
doSetup();
ListenableFuture<TaskStatus> future = remoteTaskRunner.run(task);
Assert.assertTrue(taskAnnounced(task.getId()));
mockWorkerRunningTask(task);
Assert.assertTrue(workerRunningTask(task.getId()));
Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTaskId().equals(task.getId()));
cf.delete().forPath(JOINER.join(STATUS_PATH, task.getId()));
TaskStatus status = future.get();
Assert.assertEquals(status.getStatusCode(), TaskState.FAILED);
Assert.assertNotNull(status.getErrorMsg());
Assert.assertTrue(status.getErrorMsg().contains("The worker that this task was assigned disappeared"));
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RemoteTaskRunnerTest method testBlacklistZKWorkers.
@Test
public void testBlacklistZKWorkers() throws Exception {
makeWorker();
RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
rtrConfig.setMaxPercentageBlacklistWorkers(100);
makeRemoteTaskRunner(rtrConfig);
TestRealtimeTask task1 = new TestRealtimeTask("realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper);
Future<TaskStatus> taskFuture1 = remoteTaskRunner.run(task1);
Assert.assertTrue(taskAnnounced(task1.getId()));
mockWorkerRunningTask(task1);
mockWorkerCompleteFailedTask(task1);
Assert.assertTrue(taskFuture1.get().isFailure());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(1, remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount());
TestRealtimeTask task2 = new TestRealtimeTask("realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.running("realtime2"), jsonMapper);
Future<TaskStatus> taskFuture2 = remoteTaskRunner.run(task2);
Assert.assertTrue(taskAnnounced(task2.getId()));
mockWorkerRunningTask(task2);
mockWorkerCompleteFailedTask(task2);
Assert.assertTrue(taskFuture2.get().isFailure());
Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(2, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis());
remoteTaskRunner.checkBlackListedNodes();
Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis());
remoteTaskRunner.checkBlackListedNodes();
// After backOffTime the nodes are removed from blacklist
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
TestRealtimeTask task3 = new TestRealtimeTask("realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.running("realtime3"), jsonMapper);
Future<TaskStatus> taskFuture3 = remoteTaskRunner.run(task3);
Assert.assertTrue(taskAnnounced(task3.getId()));
mockWorkerRunningTask(task3);
mockWorkerCompleteSuccessfulTask(task3);
Assert.assertTrue(taskFuture3.get().isSuccess());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount());
}
Aggregations