use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunner method startWorkersHandling.
private void startWorkersHandling() throws InterruptedException {
final CountDownLatch workerViewInitialized = new CountDownLatch(1);
DruidNodeDiscovery druidNodeDiscovery = druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY);
this.nodeDiscoveryListener = new DruidNodeDiscovery.Listener() {
@Override
public void nodesAdded(Collection<DiscoveryDruidNode> nodes) {
nodes.forEach(node -> addWorker(toWorker(node)));
}
@Override
public void nodesRemoved(Collection<DiscoveryDruidNode> nodes) {
nodes.forEach(node -> removeWorker(toWorker(node)));
}
@Override
public void nodeViewInitialized() {
// CountDownLatch.countDown() does nothing when count has already reached 0.
workerViewInitialized.countDown();
}
};
druidNodeDiscovery.registerListener(nodeDiscoveryListener);
long workerDiscoveryStartTime = System.currentTimeMillis();
while (!workerViewInitialized.await(30, TimeUnit.SECONDS)) {
if (System.currentTimeMillis() - workerDiscoveryStartTime > TimeUnit.MINUTES.toMillis(5)) {
throw new ISE("Couldn't discover workers.");
} else {
log.info("Waiting for worker discovery...");
}
}
log.info("[%s] Workers are discovered.", workers.size());
// We would start assigning tasks which are pretty soon going to be reported by discovered workers.
for (WorkerHolder worker : workers.values()) {
log.info("Waiting for worker[%s] to sync state...", worker.getWorker().getHost());
worker.waitForInitialization();
}
log.info("Workers have sync'd state successfully.");
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunner method getLazyTaskSlotCount.
@Override
public Map<String, Long> getLazyTaskSlotCount() {
Map<String, Long> totalLazyPeons = new HashMap<>();
for (Worker worker : getLazyWorkers()) {
String workerCategory = worker.getCategory();
int workerLazyPeons = worker.getCapacity();
totalLazyPeons.compute(workerCategory, (category, lazyPeons) -> lazyPeons == null ? workerLazyPeons : lazyPeons + workerLazyPeons);
}
return totalLazyPeons;
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunner method addWorker.
private void addWorker(final Worker worker) {
synchronized (workers) {
log.info("Worker[%s] reportin' for duty!", worker.getHost());
cancelWorkerCleanup(worker.getHost());
WorkerHolder holder = workers.get(worker.getHost());
if (holder == null) {
List<TaskAnnouncement> expectedAnnouncements = new ArrayList<>();
synchronized (statusLock) {
// manages the task syncing with that worker.
for (Map.Entry<String, HttpRemoteTaskRunnerWorkItem> e : tasks.entrySet()) {
if (e.getValue().getState() == HttpRemoteTaskRunnerWorkItem.State.RUNNING) {
Worker w = e.getValue().getWorker();
if (w != null && w.getHost().equals(worker.getHost())) {
expectedAnnouncements.add(TaskAnnouncement.create(e.getValue().getTask(), TaskStatus.running(e.getKey()), e.getValue().getLocation()));
}
}
}
}
holder = createWorkerHolder(smileMapper, httpClient, config, workersSyncExec, this::taskAddedOrUpdated, worker, expectedAnnouncements);
holder.start();
workers.put(worker.getHost(), holder);
} else {
log.info("Worker[%s] already exists.", worker.getHost());
}
}
synchronized (statusLock) {
statusLock.notifyAll();
}
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class WorkerHolderTest method testSyncListener.
@Test
public void testSyncListener() {
List<TaskAnnouncement> updates = new ArrayList<>();
Task task0 = NoopTask.create("task0", 0);
Task task1 = NoopTask.create("task1", 0);
Task task2 = NoopTask.create("task2", 0);
Task task3 = NoopTask.create("task3", 0);
WorkerHolder workerHolder = new WorkerHolder(TestHelper.makeJsonMapper(), EasyMock.createNiceMock(HttpClient.class), new HttpRemoteTaskRunnerConfig(), EasyMock.createNiceMock(ScheduledExecutorService.class), (taskAnnouncement, holder) -> updates.add(taskAnnouncement), new Worker("http", "localhost", "127.0.0.1", 5, "v0", WorkerConfig.DEFAULT_CATEGORY), ImmutableList.of(TaskAnnouncement.create(task0, TaskStatus.running(task0.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown())));
ChangeRequestHttpSyncer.Listener<WorkerHistoryItem> syncListener = workerHolder.createSyncListener();
Assert.assertTrue(workerHolder.disabled.get());
syncListener.fullSync(ImmutableList.of(new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("w1", 1, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("w1", 2, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 2, -1)))));
Assert.assertFalse(workerHolder.disabled.get());
Assert.assertEquals(4, updates.size());
Assert.assertEquals(task1.getId(), updates.get(0).getTaskId());
Assert.assertTrue(updates.get(0).getTaskStatus().isSuccess());
Assert.assertEquals(task2.getId(), updates.get(1).getTaskId());
Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
Assert.assertEquals(task3.getId(), updates.get(2).getTaskId());
Assert.assertTrue(updates.get(2).getTaskStatus().isRunnable());
Assert.assertEquals(task0.getId(), updates.get(3).getTaskId());
Assert.assertTrue(updates.get(3).getTaskStatus().isFailure());
Assert.assertNotNull(updates.get(3).getTaskStatus().getErrorMsg());
Assert.assertTrue(updates.get(3).getTaskStatus().getErrorMsg().startsWith("This task disappeared on the worker where it was assigned"));
updates.clear();
syncListener.deltaSync(ImmutableList.of(new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskRemoval(task1.getId()), new WorkerHistoryItem.Metadata(true), new WorkerHistoryItem.TaskRemoval(task2.getId()), new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 3, -1)))));
Assert.assertFalse(workerHolder.disabled.get());
Assert.assertEquals(2, updates.size());
Assert.assertEquals(task2.getId(), updates.get(0).getTaskId());
Assert.assertTrue(updates.get(0).getTaskStatus().isFailure());
Assert.assertNotNull(updates.get(0).getTaskStatus().getErrorMsg());
Assert.assertTrue(updates.get(0).getTaskStatus().getErrorMsg().startsWith("This task disappeared on the worker where it was assigned"));
Assert.assertEquals(task3.getId(), updates.get(1).getTaskId());
Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
updates.clear();
syncListener.fullSync(ImmutableList.of(new WorkerHistoryItem.Metadata(true), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("w1", 1, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("w1", 2, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 2, -1)))));
Assert.assertTrue(workerHolder.disabled.get());
Assert.assertEquals(3, updates.size());
Assert.assertEquals(task1.getId(), updates.get(0).getTaskId());
Assert.assertTrue(updates.get(0).getTaskStatus().isSuccess());
Assert.assertEquals(task2.getId(), updates.get(1).getTaskId());
Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
Assert.assertEquals(task3.getId(), updates.get(2).getTaskId());
Assert.assertTrue(updates.get(2).getTaskStatus().isRunnable());
updates.clear();
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class OverlordResourceTest method testGetTotalWorkerCapacityWithAutoScaleConfiguredAndProvisioningStrategyNotSupportExpectedWorkerCapacity.
@Test
public void testGetTotalWorkerCapacityWithAutoScaleConfiguredAndProvisioningStrategyNotSupportExpectedWorkerCapacity() {
int invalidExpectedCapacity = -1;
int maxNumWorkers = 2;
WorkerTaskRunner workerTaskRunner = EasyMock.createMock(WorkerTaskRunner.class);
Collection<ImmutableWorkerInfo> workerInfos = ImmutableList.of(new ImmutableWorkerInfo(new Worker("http", "testWorker", "192.0.0.1", 3, "v1", WorkerConfig.DEFAULT_CATEGORY), 2, ImmutableSet.of("grp1", "grp2"), ImmutableSet.of("task1", "task2"), DateTimes.of("2015-01-01T01:01:01Z")));
EasyMock.expect(workerTaskRunner.getWorkers()).andReturn(workerInfos);
EasyMock.reset(taskMaster);
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(workerTaskRunner)).anyTimes();
EasyMock.expect(provisioningStrategy.getExpectedWorkerCapacity(workerInfos)).andReturn(invalidExpectedCapacity).anyTimes();
AutoScaler autoScaler = EasyMock.createMock(AutoScaler.class);
EasyMock.expect(autoScaler.getMinNumWorkers()).andReturn(0);
EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(maxNumWorkers);
DefaultWorkerBehaviorConfig workerBehaviorConfig = new DefaultWorkerBehaviorConfig(null, autoScaler);
AtomicReference<WorkerBehaviorConfig> workerBehaviorConfigAtomicReference = new AtomicReference<>(workerBehaviorConfig);
EasyMock.expect(configManager.watch(WorkerBehaviorConfig.CONFIG_KEY, WorkerBehaviorConfig.class)).andReturn(workerBehaviorConfigAtomicReference);
EasyMock.replay(workerTaskRunner, autoScaler, taskRunner, taskMaster, taskStorageQueryAdapter, indexerMetadataStorageAdapter, req, workerTaskRunnerQueryAdapter, configManager, provisioningStrategy);
final Response response = overlordResource.getTotalWorkerCapacity();
Assert.assertEquals(HttpResponseStatus.OK.getCode(), response.getStatus());
Assert.assertEquals(workerInfos.stream().findFirst().get().getWorker().getCapacity(), ((TotalWorkerCapacityResponse) response.getEntity()).getCurrentClusterCapacity());
Assert.assertEquals(invalidExpectedCapacity, ((TotalWorkerCapacityResponse) response.getEntity()).getMaximumCapacityWithAutoScale());
}
Aggregations