Search in sources :

Example 41 with PathChildrenCache

use of org.apache.flink.shaded.curator5.org.apache.curator.framework.recipes.cache.PathChildrenCache in project hive by apache.

the class LlapZookeeperRegistryImpl method checkPathChildrenCache.

private synchronized void checkPathChildrenCache(long clusterReadyTimeoutMs) throws IOException {
    Preconditions.checkArgument(zooKeeperClient != null && zooKeeperClient.getState() == CuratorFrameworkState.STARTED, "client is not started");
    // lazily create PathChildrenCache
    if (instancesCache != null)
        return;
    ExecutorService tp = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("StateChangeNotificationHandler").build());
    long startTimeNs = System.nanoTime(), deltaNs = clusterReadyTimeoutMs * 1000000L;
    long sleepTimeMs = Math.min(16, clusterReadyTimeoutMs);
    while (true) {
        PathChildrenCache instancesCache = new PathChildrenCache(zooKeeperClient, workersPath, true);
        instancesCache.getListenable().addListener(new InstanceStateChangeListener(), tp);
        try {
            instancesCache.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);
            this.instancesCache = instancesCache;
            break;
        } catch (InvalidACLException e) {
            // PathChildrenCache tried to mkdir when the znode wasn't there, and failed.
            CloseableUtils.closeQuietly(instancesCache);
            long elapsedNs = System.nanoTime() - startTimeNs;
            if (deltaNs == 0 || deltaNs <= elapsedNs) {
                LOG.error("Unable to start curator PathChildrenCache", e);
                throw new IOException(e);
            }
            LOG.warn("The cluster is not started yet (InvalidACL); will retry");
            try {
                Thread.sleep(Math.min(sleepTimeMs, (deltaNs - elapsedNs) / 1000000L));
            } catch (InterruptedException e1) {
                LOG.error("Interrupted while retrying the PathChildrenCache startup");
                throw new IOException(e1);
            }
            sleepTimeMs = sleepTimeMs << 1;
        } catch (Exception e) {
            CloseableUtils.closeQuietly(instancesCache);
            LOG.error("Unable to start curator PathChildrenCache", e);
            throw new IOException(e);
        }
    }
}
Also used : ServiceInstanceStateChangeListener(org.apache.hadoop.hive.llap.registry.ServiceInstanceStateChangeListener) PathChildrenCache(org.apache.curator.framework.recipes.cache.PathChildrenCache) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) IOException(java.io.IOException) InvalidACLException(org.apache.zookeeper.KeeperException.InvalidACLException) URISyntaxException(java.net.URISyntaxException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) InvalidACLException(org.apache.zookeeper.KeeperException.InvalidACLException)

Example 42 with PathChildrenCache

use of org.apache.flink.shaded.curator5.org.apache.curator.framework.recipes.cache.PathChildrenCache in project druid by druid-io.

the class ZkCoordinator method start.

@LifecycleStart
public void start() throws IOException {
    synchronized (lock) {
        if (started) {
            return;
        }
        log.info("Starting zkCoordinator for server[%s]", me.getName());
        final String loadQueueLocation = ZKPaths.makePath(zkPaths.getLoadQueuePath(), me.getName());
        final String servedSegmentsLocation = ZKPaths.makePath(zkPaths.getServedSegmentsPath(), me.getName());
        final String liveSegmentsLocation = ZKPaths.makePath(zkPaths.getLiveSegmentsPath(), me.getName());
        loadQueueCache = new PathChildrenCache(curator, loadQueueLocation, true, true, Execs.multiThreaded(config.getNumLoadingThreads(), "ZkCoordinator-%s"));
        try {
            curator.newNamespaceAwareEnsurePath(loadQueueLocation).ensure(curator.getZookeeperClient());
            curator.newNamespaceAwareEnsurePath(servedSegmentsLocation).ensure(curator.getZookeeperClient());
            curator.newNamespaceAwareEnsurePath(liveSegmentsLocation).ensure(curator.getZookeeperClient());
            loadLocalCache();
            loadQueueCache.getListenable().addListener(new PathChildrenCacheListener() {

                @Override
                public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception {
                    final ChildData child = event.getData();
                    switch(event.getType()) {
                        case CHILD_ADDED:
                            final String path = child.getPath();
                            final DataSegmentChangeRequest request = jsonMapper.readValue(child.getData(), DataSegmentChangeRequest.class);
                            log.info("New request[%s] with zNode[%s].", request.asString(), path);
                            try {
                                request.go(getDataSegmentChangeHandler(), new DataSegmentChangeCallback() {

                                    boolean hasRun = false;

                                    @Override
                                    public void execute() {
                                        try {
                                            if (!hasRun) {
                                                curator.delete().guaranteed().forPath(path);
                                                log.info("Completed request [%s]", request.asString());
                                                hasRun = true;
                                            }
                                        } catch (Exception e) {
                                            try {
                                                curator.delete().guaranteed().forPath(path);
                                            } catch (Exception e1) {
                                                log.error(e1, "Failed to delete zNode[%s], but ignoring exception.", path);
                                            }
                                            log.error(e, "Exception while removing zNode[%s]", path);
                                            throw Throwables.propagate(e);
                                        }
                                    }
                                });
                            } catch (Exception e) {
                                try {
                                    curator.delete().guaranteed().forPath(path);
                                } catch (Exception e1) {
                                    log.error(e1, "Failed to delete zNode[%s], but ignoring exception.", path);
                                }
                                log.makeAlert(e, "Segment load/unload: uncaught exception.").addData("node", path).addData("nodeProperties", request).emit();
                            }
                            break;
                        case CHILD_REMOVED:
                            log.info("zNode[%s] was removed", event.getData().getPath());
                            break;
                        default:
                            log.info("Ignoring event[%s]", event);
                    }
                }
            });
            loadQueueCache.start();
        } catch (Exception e) {
            Throwables.propagateIfPossible(e, IOException.class);
            throw Throwables.propagate(e);
        }
        started = true;
    }
}
Also used : CuratorFramework(org.apache.curator.framework.CuratorFramework) PathChildrenCacheListener(org.apache.curator.framework.recipes.cache.PathChildrenCacheListener) PathChildrenCacheEvent(org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent) PathChildrenCache(org.apache.curator.framework.recipes.cache.PathChildrenCache) ChildData(org.apache.curator.framework.recipes.cache.ChildData) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) LifecycleStart(io.druid.java.util.common.lifecycle.LifecycleStart)

Example 43 with PathChildrenCache

use of org.apache.flink.shaded.curator5.org.apache.curator.framework.recipes.cache.PathChildrenCache in project druid by druid-io.

the class RemoteTaskRunner method addWorker.

/**
   * When a new worker appears, listeners are registered for status changes associated with tasks assigned to
   * the worker. Status changes indicate the creation or completion of a task.
   * The RemoteTaskRunner updates state according to these changes.
   *
   * @param worker contains metadata for a worker that has appeared in ZK
   *
   * @return future that will contain a fully initialized worker
   */
private ListenableFuture<ZkWorker> addWorker(final Worker worker) {
    log.info("Worker[%s] reportin' for duty!", worker.getHost());
    try {
        cancelWorkerCleanup(worker.getHost());
        final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker.getHost());
        final PathChildrenCache statusCache = workerStatusPathChildrenCacheFactory.make(cf, workerStatusPath);
        final SettableFuture<ZkWorker> retVal = SettableFuture.create();
        final ZkWorker zkWorker = new ZkWorker(worker, statusCache, jsonMapper);
        // Add status listener to the watcher for status changes
        zkWorker.addListener(new PathChildrenCacheListener() {

            @Override
            public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception {
                final String taskId;
                final RemoteTaskRunnerWorkItem taskRunnerWorkItem;
                synchronized (statusLock) {
                    try {
                        switch(event.getType()) {
                            case CHILD_ADDED:
                            case CHILD_UPDATED:
                                taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
                                final TaskAnnouncement announcement = jsonMapper.readValue(event.getData().getData(), TaskAnnouncement.class);
                                log.info("Worker[%s] wrote %s status for task [%s] on [%s]", zkWorker.getWorker().getHost(), announcement.getTaskStatus().getStatusCode(), taskId, announcement.getTaskLocation());
                                // Synchronizing state with ZK
                                statusLock.notifyAll();
                                final RemoteTaskRunnerWorkItem tmp;
                                if ((tmp = runningTasks.get(taskId)) != null) {
                                    taskRunnerWorkItem = tmp;
                                } else {
                                    final RemoteTaskRunnerWorkItem newTaskRunnerWorkItem = new RemoteTaskRunnerWorkItem(taskId, zkWorker.getWorker(), TaskLocation.unknown());
                                    final RemoteTaskRunnerWorkItem existingItem = runningTasks.putIfAbsent(taskId, newTaskRunnerWorkItem);
                                    if (existingItem == null) {
                                        log.warn("Worker[%s] announced a status for a task I didn't know about, adding to runningTasks: %s", zkWorker.getWorker().getHost(), taskId);
                                        taskRunnerWorkItem = newTaskRunnerWorkItem;
                                    } else {
                                        taskRunnerWorkItem = existingItem;
                                    }
                                }
                                if (!announcement.getTaskLocation().equals(taskRunnerWorkItem.getLocation())) {
                                    taskRunnerWorkItem.setLocation(announcement.getTaskLocation());
                                    TaskRunnerUtils.notifyLocationChanged(listeners, taskId, announcement.getTaskLocation());
                                }
                                if (announcement.getTaskStatus().isComplete()) {
                                    taskComplete(taskRunnerWorkItem, zkWorker, announcement.getTaskStatus());
                                    runPendingTasks();
                                }
                                break;
                            case CHILD_REMOVED:
                                taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
                                taskRunnerWorkItem = runningTasks.remove(taskId);
                                if (taskRunnerWorkItem != null) {
                                    log.info("Task[%s] just disappeared!", taskId);
                                    taskRunnerWorkItem.setResult(TaskStatus.failure(taskId));
                                    TaskRunnerUtils.notifyStatusChanged(listeners, taskId, TaskStatus.failure(taskId));
                                } else {
                                    log.info("Task[%s] went bye bye.", taskId);
                                }
                                break;
                            case INITIALIZED:
                                if (zkWorkers.putIfAbsent(worker.getHost(), zkWorker) == null) {
                                    retVal.set(zkWorker);
                                } else {
                                    final String message = String.format("WTF?! Tried to add already-existing worker[%s]", worker.getHost());
                                    log.makeAlert(message).addData("workerHost", worker.getHost()).addData("workerIp", worker.getIp()).emit();
                                    retVal.setException(new IllegalStateException(message));
                                }
                                runPendingTasks();
                        }
                    } catch (Exception e) {
                        log.makeAlert(e, "Failed to handle new worker status").addData("worker", zkWorker.getWorker().getHost()).addData("znode", event.getData().getPath()).emit();
                    }
                }
            }
        });
        zkWorker.start();
        return retVal;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : CuratorFramework(org.apache.curator.framework.CuratorFramework) PathChildrenCacheListener(org.apache.curator.framework.recipes.cache.PathChildrenCacheListener) PathChildrenCacheEvent(org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent) TaskAnnouncement(io.druid.indexing.worker.TaskAnnouncement) PathChildrenCache(org.apache.curator.framework.recipes.cache.PathChildrenCache) KeeperException(org.apache.zookeeper.KeeperException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 44 with PathChildrenCache

use of org.apache.flink.shaded.curator5.org.apache.curator.framework.recipes.cache.PathChildrenCache in project druid by druid-io.

the class DruidCoordinatorTest method setUp.

@Before
public void setUp() throws Exception {
    taskMaster = EasyMock.createMock(LoadQueueTaskMaster.class);
    druidServer = EasyMock.createMock(DruidServer.class);
    serverInventoryView = EasyMock.createMock(SingleServerInventoryView.class);
    databaseSegmentManager = EasyMock.createNiceMock(MetadataSegmentManager.class);
    metadataRuleManager = EasyMock.createNiceMock(MetadataRuleManager.class);
    configManager = EasyMock.createNiceMock(JacksonConfigManager.class);
    EasyMock.expect(configManager.watch(EasyMock.anyString(), EasyMock.anyObject(Class.class), EasyMock.anyObject())).andReturn(new AtomicReference(new CoordinatorDynamicConfig.Builder().build())).anyTimes();
    EasyMock.replay(configManager);
    setupServerAndCurator();
    curator.start();
    curator.blockUntilConnected();
    curator.create().creatingParentsIfNeeded().forPath(LOADPATH);
    objectMapper = new DefaultObjectMapper();
    druidCoordinatorConfig = new TestDruidCoordinatorConfig(new Duration(COORDINATOR_START_DELAY), new Duration(COORDINATOR_PERIOD), null, null, new Duration(COORDINATOR_PERIOD), null, 10, null, false, false, new Duration("PT0s"));
    pathChildrenCache = new PathChildrenCache(curator, LOADPATH, true, true, Execs.singleThreaded("coordinator_test_path_children_cache-%d"));
    loadQueuePeon = new LoadQueuePeon(curator, LOADPATH, objectMapper, Execs.scheduledSingleThreaded("coordinator_test_load_queue_peon_scheduled-%d"), Execs.singleThreaded("coordinator_test_load_queue_peon-%d"), druidCoordinatorConfig);
    loadQueuePeon.start();
    druidNode = new DruidNode("hey", "what", 1234);
    loadManagementPeons = new MapMaker().makeMap();
    scheduledExecutorFactory = new ScheduledExecutorFactory() {

        @Override
        public ScheduledExecutorService create(int corePoolSize, final String nameFormat) {
            return Executors.newSingleThreadScheduledExecutor();
        }
    };
    leaderAnnouncerLatch = new CountDownLatch(1);
    leaderUnannouncerLatch = new CountDownLatch(1);
    coordinator = new DruidCoordinator(druidCoordinatorConfig, new ZkPathsConfig() {

        @Override
        public String getBase() {
            return "druid";
        }
    }, configManager, databaseSegmentManager, serverInventoryView, metadataRuleManager, curator, new NoopServiceEmitter(), scheduledExecutorFactory, null, taskMaster, new NoopServiceAnnouncer() {

        @Override
        public void announce(DruidNode node) {
            // count down when this coordinator becomes the leader
            leaderAnnouncerLatch.countDown();
        }

        @Override
        public void unannounce(DruidNode node) {
            leaderUnannouncerLatch.countDown();
        }
    }, druidNode, loadManagementPeons, null, new CostBalancerStrategyFactory());
}
Also used : MetadataSegmentManager(io.druid.metadata.MetadataSegmentManager) MetadataRuleManager(io.druid.metadata.MetadataRuleManager) ZkPathsConfig(io.druid.server.initialization.ZkPathsConfig) SingleServerInventoryView(io.druid.client.SingleServerInventoryView) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) JacksonConfigManager(io.druid.common.config.JacksonConfigManager) MapMaker(com.google.common.collect.MapMaker) ImmutableDruidServer(io.druid.client.ImmutableDruidServer) DruidServer(io.druid.client.DruidServer) AtomicReference(java.util.concurrent.atomic.AtomicReference) Duration(org.joda.time.Duration) NoopServiceEmitter(io.druid.server.metrics.NoopServiceEmitter) CountDownLatch(java.util.concurrent.CountDownLatch) ScheduledExecutorFactory(io.druid.java.util.common.concurrent.ScheduledExecutorFactory) PathChildrenCache(org.apache.curator.framework.recipes.cache.PathChildrenCache) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DruidNode(io.druid.server.DruidNode) NoopServiceAnnouncer(io.druid.curator.discovery.NoopServiceAnnouncer) Before(org.junit.Before)

Example 45 with PathChildrenCache

use of org.apache.flink.shaded.curator5.org.apache.curator.framework.recipes.cache.PathChildrenCache in project druid by druid-io.

the class LoadQueuePeonTest method setUp.

@Before
public void setUp() throws Exception {
    setupServerAndCurator();
    curator.start();
    curator.blockUntilConnected();
    curator.create().creatingParentsIfNeeded().forPath(LOAD_QUEUE_PATH);
    loadQueueCache = new PathChildrenCache(curator, LOAD_QUEUE_PATH, true, true, Execs.singleThreaded("load_queue_cache-%d"));
}
Also used : PathChildrenCache(org.apache.curator.framework.recipes.cache.PathChildrenCache) Before(org.junit.Before)

Aggregations

PathChildrenCache (org.apache.curator.framework.recipes.cache.PathChildrenCache)73 IOException (java.io.IOException)25 CuratorFramework (org.apache.curator.framework.CuratorFramework)21 PathChildrenCacheListener (org.apache.curator.framework.recipes.cache.PathChildrenCacheListener)20 PathChildrenCacheEvent (org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent)14 KeeperException (org.apache.zookeeper.KeeperException)11 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 ChildData (org.apache.curator.framework.recipes.cache.ChildData)8 CountDownLatch (java.util.concurrent.CountDownLatch)6 ExponentialBackoffRetry (org.apache.curator.retry.ExponentialBackoffRetry)6 Before (org.junit.Before)5 Test (org.junit.Test)5 ConcurrentMap (java.util.concurrent.ConcurrentMap)4 ExecutorService (java.util.concurrent.ExecutorService)4 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)4 Slf4j (lombok.extern.slf4j.Slf4j)4 ZKPaths (org.apache.curator.utils.ZKPaths)4 Preconditions (com.google.common.base.Preconditions)3 StreamImpl (io.pravega.client.stream.impl.StreamImpl)3 CompletableFuture (java.util.concurrent.CompletableFuture)3