Search in sources :

Example 16 with RemoteTaskRunnerConfig

use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.

the class RemoteTaskRunnerTest method testBlacklistZKWorkers25Percent.

/**
 * With 2 workers and maxPercentageBlacklistWorkers(25), neither worker should ever be blacklisted even after
 * exceeding maxRetriesBeforeBlacklist.
 */
@Test
public void testBlacklistZKWorkers25Percent() throws Exception {
    rtrTestUtils.makeWorker("worker", 10);
    rtrTestUtils.makeWorker("worker2", 10);
    RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
    rtrConfig.setMaxPercentageBlacklistWorkers(25);
    makeRemoteTaskRunner(rtrConfig);
    String firstWorker = null;
    String secondWorker = null;
    for (int i = 1; i < 13; i++) {
        String taskId = StringUtils.format("rt-%d", i);
        TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
        Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
        if (i == 1) {
            if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
                firstWorker = "worker2";
                secondWorker = "worker";
            } else {
                firstWorker = "worker";
                secondWorker = "worker2";
            }
        }
        final String expectedWorker = i % 2 == 0 ? secondWorker : firstWorker;
        Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
        rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
        rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
        Assert.assertTrue(taskFuture.get().isFailure());
        Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
        Assert.assertEquals(((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
    }
}
Also used : TestRealtimeTask(org.apache.druid.indexing.common.TestRealtimeTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 17 with RemoteTaskRunnerConfig

use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.

the class RemoteTaskRunnerTest method testWorkerRemoved.

@Test
public void testWorkerRemoved() throws Exception {
    doSetup();
    Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
    Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue());
    Future<TaskStatus> future = remoteTaskRunner.run(task);
    Assert.assertTrue(taskAnnounced(task.getId()));
    mockWorkerRunningTask(task);
    Assert.assertTrue(workerRunningTask(task.getId()));
    cf.delete().forPath(ANNOUCEMENTS_PATH);
    TaskStatus status = future.get();
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    Assert.assertNotNull(status.getErrorMsg());
    Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup"));
    RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig();
    Assert.assertTrue(TestUtils.conditionValid(new IndexingServiceCondition() {

        @Override
        public boolean isValid() {
            return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty();
        }
    }, // cleanup task is independently scheduled by event listener. we need to wait some more time.
    config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2));
    Assert.assertNull(cf.checkExists().forPath(STATUS_PATH));
    Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
    Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY));
}
Also used : IndexingServiceCondition(org.apache.druid.indexing.common.IndexingServiceCondition) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 18 with RemoteTaskRunnerConfig

use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.

the class RemoteTaskRunnerTest method testBlacklistZKWorkers.

@Test
public void testBlacklistZKWorkers() throws Exception {
    makeWorker();
    RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
    rtrConfig.setMaxPercentageBlacklistWorkers(100);
    makeRemoteTaskRunner(rtrConfig);
    TestRealtimeTask task1 = new TestRealtimeTask("realtime1", new TaskResource("realtime1", 1), "foo", TaskStatus.success("realtime1"), jsonMapper);
    Future<TaskStatus> taskFuture1 = remoteTaskRunner.run(task1);
    Assert.assertTrue(taskAnnounced(task1.getId()));
    mockWorkerRunningTask(task1);
    mockWorkerCompleteFailedTask(task1);
    Assert.assertTrue(taskFuture1.get().isFailure());
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(1, remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount());
    TestRealtimeTask task2 = new TestRealtimeTask("realtime2", new TaskResource("realtime2", 1), "foo", TaskStatus.running("realtime2"), jsonMapper);
    Future<TaskStatus> taskFuture2 = remoteTaskRunner.run(task2);
    Assert.assertTrue(taskAnnounced(task2.getId()));
    mockWorkerRunningTask(task2);
    mockWorkerCompleteFailedTask(task2);
    Assert.assertTrue(taskFuture2.get().isFailure());
    Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(2, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
    ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis());
    remoteTaskRunner.checkBlackListedNodes();
    Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size());
    ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner).setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis());
    remoteTaskRunner.checkBlackListedNodes();
    // After backOffTime the nodes are removed from blacklist
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount());
    TestRealtimeTask task3 = new TestRealtimeTask("realtime3", new TaskResource("realtime3", 1), "foo", TaskStatus.running("realtime3"), jsonMapper);
    Future<TaskStatus> taskFuture3 = remoteTaskRunner.run(task3);
    Assert.assertTrue(taskAnnounced(task3.getId()));
    mockWorkerRunningTask(task3);
    mockWorkerCompleteSuccessfulTask(task3);
    Assert.assertTrue(taskFuture3.get().isSuccess());
    Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
    Assert.assertEquals(0, remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount());
}
Also used : TestRealtimeTask(org.apache.druid.indexing.common.TestRealtimeTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 19 with RemoteTaskRunnerConfig

use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.

the class RemoteTaskRunnerTest method testBlacklistZKWorkers50Percent.

/**
 * With 2 workers and maxPercentageBlacklistWorkers(50), one worker should get blacklisted after the second failure
 * and the second worker should never be blacklisted even after exceeding maxRetriesBeforeBlacklist.
 */
@Test
public void testBlacklistZKWorkers50Percent() throws Exception {
    rtrTestUtils.makeWorker("worker", 10);
    rtrTestUtils.makeWorker("worker2", 10);
    RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD);
    rtrConfig.setMaxPercentageBlacklistWorkers(50);
    makeRemoteTaskRunner(rtrConfig);
    String firstWorker = null;
    String secondWorker = null;
    for (int i = 1; i < 13; i++) {
        String taskId = StringUtils.format("rt-%d", i);
        TestRealtimeTask task = new TestRealtimeTask(taskId, new TaskResource(taskId, 1), "foo", TaskStatus.success(taskId), jsonMapper);
        Future<TaskStatus> taskFuture = remoteTaskRunner.run(task);
        if (i == 1) {
            if (rtrTestUtils.taskAnnounced("worker2", task.getId())) {
                firstWorker = "worker2";
                secondWorker = "worker";
            } else {
                firstWorker = "worker";
                secondWorker = "worker2";
            }
        }
        final String expectedWorker = i % 2 == 0 || i > 4 ? secondWorker : firstWorker;
        Assert.assertTrue(rtrTestUtils.taskAnnounced(expectedWorker, task.getId()));
        rtrTestUtils.mockWorkerRunningTask(expectedWorker, task);
        rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task);
        Assert.assertTrue(taskFuture.get().isFailure());
        Assert.assertEquals(i > 2 ? 1 : 0, remoteTaskRunner.getBlackListedWorkers().size());
        Assert.assertEquals(i > 4 ? i - 2 : ((i + 1) / 2), remoteTaskRunner.findWorkerRunningTask(task.getId()).getContinuouslyFailedTasksCount());
    }
}
Also used : TestRealtimeTask(org.apache.druid.indexing.common.TestRealtimeTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Example 20 with RemoteTaskRunnerConfig

use of org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig in project druid by druid-io.

the class PendingTaskBasedProvisioningStrategyTest method testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning.

@Test
public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() {
    EasyMock.expect(autoScaler.getMinNumWorkers()).andReturn(3).times(2);
    EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5);
    EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())).andReturn(new ArrayList<String>());
    RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class);
    // No pending tasks
    EasyMock.expect(runner.getPendingTaskPayloads()).andReturn(new ArrayList<>());
    // 1 node already running, only provision 2 more.
    EasyMock.expect(runner.getWorkers()).andReturn(Arrays.asList(new TestZkWorker(testTask).toImmutable(), // Invalid version node
    new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable()));
    EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig());
    EasyMock.expect(autoScaler.provision()).andReturn(new AutoScalingData(Collections.singletonList("aNode"))).times(2);
    EasyMock.replay(runner, autoScaler);
    Provisioner provisioner = strategy.makeProvisioner(runner);
    boolean provisionedSomething = provisioner.doProvision();
    Assert.assertTrue(provisionedSomething);
    Assert.assertTrue(provisioner.getStats().toList().size() == 2);
    for (ScalingStats.ScalingEvent event : provisioner.getStats().toList()) {
        Assert.assertTrue(event.getEvent() == ScalingStats.EVENT.PROVISION);
    }
}
Also used : RemoteTaskRunner(org.apache.druid.indexing.overlord.RemoteTaskRunner) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Test(org.junit.Test)

Aggregations

RemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig)32 Test (org.junit.Test)29 ImmutableWorkerInfo (org.apache.druid.indexing.overlord.ImmutableWorkerInfo)14 NoopTask (org.apache.druid.indexing.common.task.NoopTask)12 RemoteTaskRunner (org.apache.druid.indexing.overlord.RemoteTaskRunner)11 Worker (org.apache.druid.indexing.worker.Worker)11 TaskStatus (org.apache.druid.indexer.TaskStatus)5 TestRealtimeTask (org.apache.druid.indexing.common.TestRealtimeTask)5 TaskResource (org.apache.druid.indexing.common.task.TaskResource)5 Supplier (com.google.common.base.Supplier)4 ArrayList (java.util.ArrayList)4 DateTime (org.joda.time.DateTime)4 Period (org.joda.time.Period)4 Function (com.google.common.base.Function)1 ExponentialBackoffRetry (org.apache.curator.retry.ExponentialBackoffRetry)1 TestingCluster (org.apache.curator.test.TestingCluster)1 PotentiallyGzippedCompressionProvider (org.apache.druid.curator.PotentiallyGzippedCompressionProvider)1 IndexingServiceCondition (org.apache.druid.indexing.common.IndexingServiceCondition)1 WorkerCuratorCoordinator (org.apache.druid.indexing.worker.WorkerCuratorCoordinator)1 ServiceEmitter (org.apache.druid.java.util.emitter.service.ServiceEmitter)1