Search in sources :

Example 26 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class WorkerResourceTest method setUp.

@Before
public void setUp() throws Exception {
    testingCluster = new TestingCluster(1);
    testingCluster.start();
    cf = CuratorFrameworkFactory.builder().connectString(testingCluster.getConnectString()).retryPolicy(new ExponentialBackoffRetry(1, 10)).compressionProvider(new PotentiallyGzippedCompressionProvider(false)).build();
    cf.start();
    cf.blockUntilConnected();
    cf.create().creatingParentsIfNeeded().forPath(BASE_PATH);
    worker = new Worker("http", "host", "ip", 3, "v1", WorkerConfig.DEFAULT_CATEGORY);
    curatorCoordinator = new WorkerCuratorCoordinator(JSON_MAPPER, new IndexerZkConfig(new ZkPathsConfig() {

        @Override
        public String getBase() {
            return BASE_PATH;
        }
    }, null, null, null, null), new RemoteTaskRunnerConfig(), cf, worker);
    curatorCoordinator.start();
    workerResource = new WorkerResource(worker, () -> curatorCoordinator, null, EasyMock.createNiceMock(WorkerTaskMonitor.class), ZkEnablementConfig.ENABLED);
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) TestingCluster(org.apache.curator.test.TestingCluster) ExponentialBackoffRetry(org.apache.curator.retry.ExponentialBackoffRetry) WorkerCuratorCoordinator(org.apache.druid.indexing.worker.WorkerCuratorCoordinator) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) Worker(org.apache.druid.indexing.worker.Worker) PotentiallyGzippedCompressionProvider(org.apache.druid.curator.PotentiallyGzippedCompressionProvider) RemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig) Before(org.junit.Before)

Example 27 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class WorkerResourceTest method testDoEnable.

@Test
public void testDoEnable() throws Exception {
    // Disable the worker
    Response res = workerResource.doDisable();
    Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus());
    Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class);
    Assert.assertTrue(theWorker.getVersion().isEmpty());
    // Enable the worker
    res = workerResource.doEnable();
    Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus());
    theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class);
    Assert.assertEquals("v1", theWorker.getVersion());
}
Also used : Response(javax.ws.rs.core.Response) Worker(org.apache.druid.indexing.worker.Worker) Test(org.junit.Test)

Example 28 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class WorkerResourceTest method testDoDisable.

@Test
public void testDoDisable() throws Exception {
    Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class);
    Assert.assertEquals("v1", theWorker.getVersion());
    Response res = workerResource.doDisable();
    Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus());
    theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class);
    Assert.assertTrue(theWorker.getVersion().isEmpty());
}
Also used : Response(javax.ws.rs.core.Response) Worker(org.apache.druid.indexing.worker.Worker) Test(org.junit.Test)

Example 29 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class ImmutableWorkerInfoTest method test_canRunTask.

@Test
public void test_canRunTask() {
    ImmutableWorkerInfo workerInfo = new ImmutableWorkerInfo(new Worker("http", "testWorker2", "192.0.0.1", 10, "v1", WorkerConfig.DEFAULT_CATEGORY), 6, 0, ImmutableSet.of("grp1", "grp2"), ImmutableSet.of("task1", "task2"), DateTimes.of("2015-01-01T01:01:02Z"));
    // Parallel index task
    TaskResource taskResource0 = mock(TaskResource.class);
    when(taskResource0.getRequiredCapacity()).thenReturn(3);
    Task parallelIndexTask = mock(ParallelIndexSupervisorTask.class);
    when(parallelIndexTask.getType()).thenReturn(ParallelIndexSupervisorTask.TYPE);
    when(parallelIndexTask.getTaskResource()).thenReturn(taskResource0);
    // Since task satisifies parallel and total slot constraints, can run
    Assert.assertTrue(workerInfo.canRunTask(parallelIndexTask, 0.5));
    // Since task fails the parallel slot constraint, it cannot run (3 > 1)
    Assert.assertFalse(workerInfo.canRunTask(parallelIndexTask, 0.1));
    // Some other indexing task
    TaskResource taskResource1 = mock(TaskResource.class);
    when(taskResource1.getRequiredCapacity()).thenReturn(5);
    Task anyOtherTask = mock(IndexTask.class);
    when(anyOtherTask.getType()).thenReturn("index");
    when(anyOtherTask.getTaskResource()).thenReturn(taskResource1);
    // Not a parallel index task ->  satisfies parallel index constraint
    // But does not satisfy the total slot constraint and cannot run (11 > 10)
    Assert.assertFalse(workerInfo.canRunTask(anyOtherTask, 0.5));
    // Task has an availability conflict ("grp1")
    TaskResource taskResource2 = mock(TaskResource.class);
    when(taskResource2.getRequiredCapacity()).thenReturn(1);
    when(taskResource2.getAvailabilityGroup()).thenReturn("grp1");
    Task grp1Task = mock(IndexTask.class);
    when(grp1Task.getType()).thenReturn("blah");
    when(grp1Task.getTaskResource()).thenReturn(taskResource2);
    // Satisifies parallel index and total index slot constraints but cannot run due availability
    Assert.assertFalse(workerInfo.canRunTask(grp1Task, 0.3));
}
Also used : ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Task(org.apache.druid.indexing.common.task.Task) TaskResource(org.apache.druid.indexing.common.task.TaskResource) Worker(org.apache.druid.indexing.worker.Worker) Test(org.junit.Test)

Example 30 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class RemoteTaskRunner method start.

@Override
@LifecycleStart
public void start() {
    if (!lifecycleLock.canStart()) {
        return;
    }
    try {
        log.info("Starting RemoteTaskRunner...");
        final MutableInt waitingFor = new MutableInt(1);
        final Object waitingForMonitor = new Object();
        // Add listener for creation/deletion of workers
        workerPathCache.getListenable().addListener((client, event) -> {
            final Worker worker;
            switch(event.getType()) {
                case CHILD_ADDED:
                    worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                    synchronized (waitingForMonitor) {
                        waitingFor.increment();
                    }
                    Futures.addCallback(addWorker(worker), new FutureCallback<ZkWorker>() {

                        @Override
                        public void onSuccess(ZkWorker zkWorker) {
                            synchronized (waitingForMonitor) {
                                waitingFor.decrement();
                                waitingForMonitor.notifyAll();
                            }
                        }

                        @Override
                        public void onFailure(Throwable throwable) {
                            synchronized (waitingForMonitor) {
                                waitingFor.decrement();
                                waitingForMonitor.notifyAll();
                            }
                        }
                    });
                    break;
                case CHILD_UPDATED:
                    worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                    updateWorker(worker);
                    break;
                case CHILD_REMOVED:
                    worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                    removeWorker(worker);
                    break;
                case INITIALIZED:
                    // Schedule cleanup for task status of the workers that might have disconnected while overlord was not running
                    List<String> workers;
                    try {
                        workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
                    } catch (KeeperException.NoNodeException e) {
                        // statusPath doesn't exist yet; can occur if no middleManagers have started.
                        workers = ImmutableList.of();
                    }
                    for (String workerId : workers) {
                        final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId);
                        final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
                        if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) {
                            try {
                                scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath));
                            } catch (Exception e) {
                                log.warn(e, "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.", workerId, workerStatusPath);
                            }
                        }
                    }
                    synchronized (waitingForMonitor) {
                        waitingFor.decrement();
                        waitingForMonitor.notifyAll();
                    }
                    break;
                case CONNECTION_SUSPENDED:
                case CONNECTION_RECONNECTED:
                case CONNECTION_LOST:
            }
        });
        workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
        synchronized (waitingForMonitor) {
            while (waitingFor.intValue() > 0) {
                waitingForMonitor.wait();
            }
        }
        ScheduledExecutors.scheduleAtFixedRate(cleanupExec, Period.ZERO.toStandardDuration(), config.getWorkerBlackListCleanupPeriod().toStandardDuration(), this::checkBlackListedNodes);
        provisioningService = provisioningStrategy.makeProvisioningService(this);
        lifecycleLock.started();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        lifecycleLock.exitStart();
    }
}
Also used : TimeoutException(java.util.concurrent.TimeoutException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) MutableInt(org.apache.commons.lang.mutable.MutableInt) Worker(org.apache.druid.indexing.worker.Worker) KeeperException(org.apache.zookeeper.KeeperException) LifecycleStart(org.apache.druid.java.util.common.lifecycle.LifecycleStart)

Aggregations

Worker (org.apache.druid.indexing.worker.Worker)46 Test (org.junit.Test)32 NoopTask (org.apache.druid.indexing.common.task.NoopTask)21 ImmutableWorkerInfo (org.apache.druid.indexing.overlord.ImmutableWorkerInfo)15 ArrayList (java.util.ArrayList)14 Task (org.apache.druid.indexing.common.task.Task)13 TaskStorage (org.apache.druid.indexing.overlord.TaskStorage)12 RemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 HttpRemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig)10 HttpClient (org.apache.druid.java.util.http.client.HttpClient)10 IndexerZkConfig (org.apache.druid.server.initialization.IndexerZkConfig)10 ImmutableList (com.google.common.collect.ImmutableList)9 List (java.util.List)9 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 CuratorFramework (org.apache.curator.framework.CuratorFramework)9 DruidNodeDiscoveryProvider (org.apache.druid.discovery.DruidNodeDiscoveryProvider)9 TaskStatus (org.apache.druid.indexer.TaskStatus)9