use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.
the class RemoteTaskRunnerTest method testBlacklistZKWorkers25Percent.
/**
* With 2 workers and maxPercentageBlacklistWorkers(25), neither worker should ever be blacklisted even after
* exceeding maxRetriesBeforeBlacklist.
*/
@Test
public void testBlacklistZKWorkers25Percent() throws Exception {
rtrTestUtils.makeWorker("worker", 10);
rtrTestUtils.makeWorker("worker2", 10);
RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
rtrConfig.setMaxPercentageBlacklistWorkers(25);
makeRemoteTaskRunner(rtrConfig);
String firstWorker = null;
String secondWorker = null;
for (int i = 1; i < 13; i++) {
String taskId = StringUtils.format("rt-%d", i);
TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
if (i == 1) {
if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
firstWorker = "worker2";
secondWorker = "worker";
} else {
firstWorker = "worker";
secondWorker = "worker2";
}
}
final String expectedWorker = i % 2 == 0 ? secondWorker : firstWorker;
Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
Assert.assertTrue(taskFuture.get().isFailure());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
}
}
use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.
the class RemoteTaskRunnerTest method testWorkerRemoved.
@Test
public void testWorkerRemoved() throws Exception {
doSetup();
Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
Future<TaskStatus> future = remoteTaskRunner.run(task);
Assert.assertTrue(taskAnnounced(task.getId()));
mockWorkerRunningTask(task);
Assert.assertTrue(workerRunningTask(task.getId()));
cf.delete().forPath(ANNOUCEMENTS_PATH);
TaskStatus status = future.get();
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
Assert.assertNotNull(status.getErrorMsg());
Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup"));
RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig();
Assert.assertTrue(TestUtils.conditionValid(new IndexingServiceCondition() {
@Override
public boolean isValid() {
return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty();
}
}, // cleanup task is independently scheduled by event listener. we need to wait some more time.
config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2));
Assert.assertNull(cf.checkExists().forPath(STATUS_PATH));
Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
}
use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.
the class RemoteTaskRunnerTest method testBlacklistZKWorkers.
@Test
public void testBlacklistZKWorkers() throws Exception {
makeWorker();
RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
rtrConfig.setMaxPercentageBlacklistWorkers(100);
makeRemoteTaskRunner(rtrConfig);
TestRealtimeTask task1 = new TestRealtimeTask("realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper);
Future<TaskStatus> taskFuture1 = remoteTaskRunner.run(task1);
Assert.assertTrue(taskAnnounced(task1.getId()));
mockWorkerRunningTask(task1);
mockWorkerCompleteFailedTask(task1);
Assert.assertTrue(taskFuture1.get().isFailure());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(1, remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount());
TestRealtimeTask task2 = new TestRealtimeTask("realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.running("realtime2"), jsonMapper);
Future<TaskStatus> taskFuture2 = remoteTaskRunner.run(task2);
Assert.assertTrue(taskAnnounced(task2.getId()));
mockWorkerRunningTask(task2);
mockWorkerCompleteFailedTask(task2);
Assert.assertTrue(taskFuture2.get().isFailure());
Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(2, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis());
remoteTaskRunner.checkBlackListedNodes();
Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis());
remoteTaskRunner.checkBlackListedNodes();
// After backOffTime the nodes are removed from blacklist
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
TestRealtimeTask task3 = new TestRealtimeTask("realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.running("realtime3"), jsonMapper);
Future<TaskStatus> taskFuture3 = remoteTaskRunner.run(task3);
Assert.assertTrue(taskAnnounced(task3.getId()));
mockWorkerRunningTask(task3);
mockWorkerCompleteSuccessfulTask(task3);
Assert.assertTrue(taskFuture3.get().isSuccess());
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount());
}
use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.
the class RemoteTaskRunnerTest method testBlacklistZKWorkers50Percent.
/**
* With 2 workers and maxPercentageBlacklistWorkers(50), one worker should get blacklisted after the second failure
* and the second worker should never be blacklisted even after exceeding maxRetriesBeforeBlacklist.
*/
@Test
public void testBlacklistZKWorkers50Percent() throws Exception {
rtrTestUtils.makeWorker("worker", 10);
rtrTestUtils.makeWorker("worker2", 10);
RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
rtrConfig.setMaxPercentageBlacklistWorkers(50);
makeRemoteTaskRunner(rtrConfig);
String firstWorker = null;
String secondWorker = null;
for (int i = 1; i < 13; i++) {
String taskId = StringUtils.format("rt-%d", i);
TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
if (i == 1) {
if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
firstWorker = "worker2";
secondWorker = "worker";
} else {
firstWorker = "worker";
secondWorker = "worker2";
}
}
final String expectedWorker = i % 2 == 0 || i > 4 ? secondWorker : firstWorker;
Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
Assert.assertTrue(taskFuture.get().isFailure());
Assert.assertEquals(i > 2 ? 1 : 0, remoteTaskRunner.getBlackListedWorkers().size());
Assert.assertEquals(i > 4 ? i - 2 : ((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
}
}
use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.
the class PendingTaskBasedProvisioningStrategyTest method testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning.
@Test
public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() {
EasyMock.expect(autoScaler.getMinNumWorkers()).andReturn(3).times(2);
EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5);
EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())).andReturn(new ArrayList<String>());
RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class);
// No pending tasks
EasyMock.expect(runner.getPendingTaskPayloads()).andReturn(new ArrayList<>());
// 1 node already running, only provision 2 more.
EasyMock.expect(runner.getWorkers()).andReturn(Arrays.asList(new TestZkWorker(testTask).toImmutable(), // Invalid version node
new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable()));
EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig());
EasyMock.expect(autoScaler.provision()).andReturn(new AutoScalingData(Collections.singletonList("aNode"))).times(2);
EasyMock.replay(runner, autoScaler);
Provisioner provisioner = strategy.makeProvisioner(runner);
boolean provisionedSomething = provisioner.doProvision();
Assert.assertTrue(provisionedSomething);
Assert.assertTrue(provisioner.getStats().toList().size() == 2);
for (ScalingStats.ScalingEvent event : provisioner.getStats().toList()) {
Assert.assertTrue(event.getEvent() == ScalingStats.EVENT.PROVISION);
}
}
Aggregations