Search in sources :

Example 1 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class RemoteTaskRunner method tryAssignTask.

/**
   * Ensures no workers are already running a task before assigning the task to a worker.
   * It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR
   * needs to bootstrap after a restart.
   *
   * @param taskRunnerWorkItem - the task to assign
   *
   * @return true iff the task is now assigned
   */
private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception {
    Preconditions.checkNotNull(task, "task");
    Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem");
    Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id");
    if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) {
        log.info("Task[%s] already running.", task.getId());
        return true;
    } else {
        // Nothing running this task, announce it in ZK for a worker to run it
        WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        WorkerSelectStrategy strategy;
        if (workerConfig == null || workerConfig.getSelectStrategy() == null) {
            log.warn("No worker selections strategy set. Using default.");
            strategy = WorkerBehaviorConfig.DEFAULT_STRATEGY;
        } else {
            strategy = workerConfig.getSelectStrategy();
        }
        ZkWorker assignedWorker = null;
        Optional<ImmutableWorkerInfo> immutableZkWorker = null;
        try {
            synchronized (workersWithUnacknowledgedTask) {
                immutableZkWorker = strategy.findWorkerForTask(config, ImmutableMap.copyOf(Maps.transformEntries(Maps.filterEntries(zkWorkers, new Predicate<Map.Entry<String, ZkWorker>>() {

                    @Override
                    public boolean apply(Map.Entry<String, ZkWorker> input) {
                        return !lazyWorkers.containsKey(input.getKey()) && !workersWithUnacknowledgedTask.containsKey(input.getKey()) && !blackListedWorkers.contains(input.getValue());
                    }
                }), new Maps.EntryTransformer<String, ZkWorker, ImmutableWorkerInfo>() {

                    @Override
                    public ImmutableWorkerInfo transformEntry(String key, ZkWorker value) {
                        return value.toImmutable();
                    }
                })), task);
                if (immutableZkWorker.isPresent() && workersWithUnacknowledgedTask.putIfAbsent(immutableZkWorker.get().getWorker().getHost(), task.getId()) == null) {
                    assignedWorker = zkWorkers.get(immutableZkWorker.get().getWorker().getHost());
                }
            }
            if (assignedWorker != null) {
                return announceTask(task, assignedWorker, taskRunnerWorkItem);
            } else {
                log.debug("Unsuccessful task-assign attempt for task [%s] on workers [%s]. Workers to ack tasks are [%s].", task.getId(), zkWorkers.values(), workersWithUnacknowledgedTask);
            }
            return false;
        } finally {
            if (assignedWorker != null) {
                workersWithUnacknowledgedTask.remove(assignedWorker.getWorker().getHost());
                //if this attempt won the race to run the task then other task might be able to use this worker now after task ack.
                runPendingTasks();
            }
        }
    }
}
Also used : WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) WorkerSelectStrategy(io.druid.indexing.overlord.setup.WorkerSelectStrategy) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 2 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.

@Override
public boolean doTerminate(WorkerTaskRunner runner) {
    Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
    synchronized (lock) {
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null) {
            log.warn("No workerConfig available, cannot terminate workers.");
            return false;
        }
        if (!currentlyProvisioning.isEmpty()) {
            log.debug("Already provisioning nodes, Not Terminating any nodes.");
            return false;
        }
        boolean didTerminate = false;
        final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
        final Set<String> stillExisting = Sets.newHashSet();
        for (String s : currentlyTerminating) {
            if (workerNodeIds.contains(s)) {
                stillExisting.add(s);
            }
        }
        currentlyTerminating.clear();
        currentlyTerminating.addAll(stillExisting);
        if (currentlyTerminating.isEmpty()) {
            final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
            final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
            final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {

                @Override
                public String apply(Worker zkWorker) {
                    return zkWorker.getIp();
                }
            }));
            if (laziestWorkerIps.isEmpty()) {
                log.debug("Found no lazy workers");
            } else {
                log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
                final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
                if (terminated != null) {
                    currentlyTerminating.addAll(terminated.getNodeIds());
                    lastTerminateTime = new DateTime();
                    scalingStats.addTerminateEvent(terminated);
                    didTerminate = true;
                }
            }
        } else {
            Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
            log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
            if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
                currentlyTerminating.clear();
            }
        }
        return didTerminate;
    }
}
Also used : Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Function(com.google.common.base.Function) Worker(io.druid.indexing.worker.Worker)

Example 3 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class SimpleWorkerResourceManagementStrategy method doProvision.

protected boolean doProvision(WorkerTaskRunner runner) {
    Collection<? extends TaskRunnerWorkItem> pendingTasks = runner.getPendingTasks();
    Collection<ImmutableWorkerInfo> workers = getWorkers(runner);
    synchronized (lock) {
        boolean didProvision = false;
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null || workerConfig.getAutoScaler() == null) {
            log.warn("No workerConfig available, cannot provision new workers.");
            return false;
        }
        final Predicate<ImmutableWorkerInfo> isValidWorker = ResourceManagementUtil.createValidWorkerPredicate(config);
        final int currValidWorkers = Collections2.filter(workers, isValidWorker).size();
        final List<String> workerNodeIds = workerConfig.getAutoScaler().ipToIdLookup(Lists.newArrayList(Iterables.transform(workers, new Function<ImmutableWorkerInfo, String>() {

            @Override
            public String apply(ImmutableWorkerInfo input) {
                return input.getWorker().getIp();
            }
        })));
        currentlyProvisioning.removeAll(workerNodeIds);
        updateTargetWorkerCount(workerConfig, pendingTasks, workers);
        int want = targetWorkerCount - (currValidWorkers + currentlyProvisioning.size());
        while (want > 0) {
            final AutoScalingData provisioned = workerConfig.getAutoScaler().provision();
            final List<String> newNodes;
            if (provisioned == null || (newNodes = provisioned.getNodeIds()).isEmpty()) {
                break;
            } else {
                currentlyProvisioning.addAll(newNodes);
                lastProvisionTime = new DateTime();
                scalingStats.addProvisionEvent(provisioned);
                want -= provisioned.getNodeIds().size();
                didProvision = true;
            }
        }
        if (!currentlyProvisioning.isEmpty()) {
            Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
            log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision);
            if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node provisioning taking too long!").addData("millisSinceLastProvision", durSinceLastProvision.getMillis()).addData("provisioningCount", currentlyProvisioning.size()).emit();
                workerConfig.getAutoScaler().terminateWithIds(Lists.newArrayList(currentlyProvisioning));
                currentlyProvisioning.clear();
            }
        }
        return didProvision;
    }
}
Also used : Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig)

Example 4 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class RemoteTaskRunnerFactoryTest method testExecNotSharedBetweenRunners.

@Test
public void testExecNotSharedBetweenRunners() {
    final AtomicInteger executorCount = new AtomicInteger(0);
    RemoteTaskRunnerConfig config = new RemoteTaskRunnerConfig();
    IndexerZkConfig indexerZkConfig = new IndexerZkConfig(new ZkPathsConfig() {

        @Override
        public String getBase() {
            return basePath;
        }
    }, null, null, null, null, null);
    HttpClient httpClient = EasyMock.createMock(HttpClient.class);
    Supplier<WorkerBehaviorConfig> workerBehaviorConfig = EasyMock.createMock(Supplier.class);
    ScheduledExecutorFactory executorFactory = new ScheduledExecutorFactory() {

        @Override
        public ScheduledExecutorService create(int i, String s) {
            executorCount.incrementAndGet();
            return ScheduledExecutors.fixed(i, s);
        }
    };
    SimpleWorkerResourceManagementConfig resourceManagementConfig = new SimpleWorkerResourceManagementConfig();
    ResourceManagementSchedulerConfig resourceManagementSchedulerConfig = new ResourceManagementSchedulerConfig() {

        @Override
        public boolean isDoAutoscale() {
            return true;
        }
    };
    RemoteTaskRunnerFactory factory = new RemoteTaskRunnerFactory(cf, config, indexerZkConfig, jsonMapper, httpClient, workerBehaviorConfig, executorFactory, resourceManagementSchedulerConfig, new SimpleWorkerResourceManagementStrategy(resourceManagementConfig, workerBehaviorConfig, resourceManagementSchedulerConfig, executorFactory));
    Assert.assertEquals(1, executorCount.get());
    RemoteTaskRunner remoteTaskRunner1 = factory.build();
    Assert.assertEquals(2, executorCount.get());
    RemoteTaskRunner remoteTaskRunner2 = factory.build();
    Assert.assertEquals(3, executorCount.get());
}
Also used : IndexerZkConfig(io.druid.server.initialization.IndexerZkConfig) SimpleWorkerResourceManagementConfig(io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementConfig) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) ScheduledExecutorFactory(io.druid.java.util.common.concurrent.ScheduledExecutorFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ZkPathsConfig(io.druid.server.initialization.ZkPathsConfig) HttpClient(com.metamx.http.client.HttpClient) SimpleWorkerResourceManagementStrategy(io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementStrategy) RemoteTaskRunnerConfig(io.druid.indexing.overlord.config.RemoteTaskRunnerConfig) ResourceManagementSchedulerConfig(io.druid.indexing.overlord.autoscaling.ResourceManagementSchedulerConfig) Test(org.junit.Test)

Example 5 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class PendingTaskBasedResourceManagementStrategyTest method setUp.

@Before
public void setUp() throws Exception {
    autoScaler = EasyMock.createMock(AutoScaler.class);
    testTask = TestTasks.immediateSuccess("task1");
    config = new PendingTaskBasedWorkerResourceManagementConfig().setMaxScalingDuration(new Period(1000)).setNumEventsToTrack(10).setPendingTaskTimeout(new Period(0)).setWorkerVersion(MIN_VERSION).setMaxScalingStep(2);
    workerConfig = new AtomicReference<>(new WorkerBehaviorConfig(new FillCapacityWorkerSelectStrategy(), autoScaler));
    strategy = new PendingTaskBasedWorkerResourceManagementStrategy(config, DSuppliers.of(workerConfig), new ResourceManagementSchedulerConfig(), executorService);
}
Also used : WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) FillCapacityWorkerSelectStrategy(io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy) Period(org.joda.time.Period) Before(org.junit.Before)

Aggregations

WorkerBehaviorConfig (io.druid.indexing.overlord.setup.WorkerBehaviorConfig)8 ImmutableWorkerInfo (io.druid.indexing.overlord.ImmutableWorkerInfo)4 DateTime (org.joda.time.DateTime)4 Duration (org.joda.time.Duration)4 Function (com.google.common.base.Function)3 Worker (io.druid.indexing.worker.Worker)2 Period (org.joda.time.Period)2 Before (org.junit.Before)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HttpClient (com.metamx.http.client.HttpClient)1 Task (io.druid.indexing.common.task.Task)1 ResourceManagementSchedulerConfig (io.druid.indexing.overlord.autoscaling.ResourceManagementSchedulerConfig)1 SimpleWorkerResourceManagementConfig (io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementConfig)1 SimpleWorkerResourceManagementStrategy (io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementStrategy)1 RemoteTaskRunnerConfig (io.druid.indexing.overlord.config.RemoteTaskRunnerConfig)1 FillCapacityWorkerSelectStrategy (io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy)1 WorkerSelectStrategy (io.druid.indexing.overlord.setup.WorkerSelectStrategy)1 ScheduledExecutorFactory (io.druid.java.util.common.concurrent.ScheduledExecutorFactory)1 IndexerZkConfig (io.druid.server.initialization.IndexerZkConfig)1 ZkPathsConfig (io.druid.server.initialization.ZkPathsConfig)1