Search in sources :

Example 6 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class PendingTaskBasedWorkerResourceManagementStrategy method doProvision.

@Override
public boolean doProvision(WorkerTaskRunner runner) {
    Collection<Task> pendingTasks = runner.getPendingTaskPayloads();
    Collection<ImmutableWorkerInfo> workers = runner.getWorkers();
    synchronized (lock) {
        boolean didProvision = false;
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null || workerConfig.getAutoScaler() == null) {
            log.error("No workerConfig available, cannot provision new workers.");
            return false;
        }
        final Collection<String> workerNodeIds = getWorkerNodeIDs(Collections2.transform(workers, new Function<ImmutableWorkerInfo, Worker>() {

            @Override
            public Worker apply(ImmutableWorkerInfo input) {
                return input.getWorker();
            }
        }), workerConfig);
        currentlyProvisioning.removeAll(workerNodeIds);
        if (currentlyProvisioning.isEmpty()) {
            int want = getScaleUpNodeCount(runner.getConfig(), workerConfig, pendingTasks, workers);
            while (want > 0) {
                final AutoScalingData provisioned = workerConfig.getAutoScaler().provision();
                final List<String> newNodes = provisioned == null ? ImmutableList.<String>of() : provisioned.getNodeIds();
                if (newNodes.isEmpty()) {
                    log.warn("NewNodes is empty, returning from provision loop");
                    break;
                } else {
                    currentlyProvisioning.addAll(newNodes);
                    lastProvisionTime = new DateTime();
                    scalingStats.addProvisionEvent(provisioned);
                    want -= provisioned.getNodeIds().size();
                    didProvision = true;
                }
            }
        } else {
            Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
            log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision);
            if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node provisioning taking too long!").addData("millisSinceLastProvision", durSinceLastProvision.getMillis()).addData("provisioningCount", currentlyProvisioning.size()).emit();
                workerConfig.getAutoScaler().terminateWithIds(Lists.newArrayList(currentlyProvisioning));
                currentlyProvisioning.clear();
            }
        }
        return didProvision;
    }
}
Also used : Task(io.druid.indexing.common.task.Task) Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Function(com.google.common.base.Function)

Example 7 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class SimpleWorkerResourceManagementStrategy method doTerminate.

boolean doTerminate(WorkerTaskRunner runner) {
    Collection<? extends TaskRunnerWorkItem> pendingTasks = runner.getPendingTasks();
    synchronized (lock) {
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null) {
            log.warn("No workerConfig available, cannot terminate workers.");
            return false;
        }
        boolean didTerminate = false;
        final Set<String> workerNodeIds = Sets.newHashSet(workerConfig.getAutoScaler().ipToIdLookup(Lists.newArrayList(Iterables.transform(runner.getLazyWorkers(), new Function<Worker, String>() {

            @Override
            public String apply(Worker input) {
                return input.getIp();
            }
        }))));
        final Set<String> stillExisting = Sets.newHashSet();
        for (String s : currentlyTerminating) {
            if (workerNodeIds.contains(s)) {
                stillExisting.add(s);
            }
        }
        currentlyTerminating.clear();
        currentlyTerminating.addAll(stillExisting);
        Collection<ImmutableWorkerInfo> workers = getWorkers(runner);
        updateTargetWorkerCount(workerConfig, pendingTasks, workers);
        if (currentlyTerminating.isEmpty()) {
            final int excessWorkers = (workers.size() + currentlyProvisioning.size()) - targetWorkerCount;
            if (excessWorkers > 0) {
                final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
                final Collection<String> laziestWorkerIps = Collections2.transform(runner.markWorkersLazy(isLazyWorker, excessWorkers), new Function<Worker, String>() {

                    @Override
                    public String apply(Worker worker) {
                        return worker.getIp();
                    }
                });
                if (laziestWorkerIps.isEmpty()) {
                    log.info("Wanted to terminate %,d workers, but couldn't find any lazy ones!", excessWorkers);
                } else {
                    log.info("Terminating %,d workers (wanted %,d): %s", laziestWorkerIps.size(), excessWorkers, Joiner.on(", ").join(laziestWorkerIps));
                    final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(ImmutableList.copyOf(laziestWorkerIps));
                    if (terminated != null) {
                        currentlyTerminating.addAll(terminated.getNodeIds());
                        lastTerminateTime = new DateTime();
                        scalingStats.addTerminateEvent(terminated);
                        didTerminate = true;
                    }
                }
            }
        } else {
            Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
            log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
            if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
                currentlyTerminating.clear();
            }
        }
        return didTerminate;
    }
}
Also used : Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Function(com.google.common.base.Function) Worker(io.druid.indexing.worker.Worker)

Example 8 with WorkerBehaviorConfig

use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.

the class SimpleResourceManagementStrategyTest method setUp.

@Before
public void setUp() throws Exception {
    autoScaler = EasyMock.createMock(AutoScaler.class);
    testTask = TestTasks.immediateSuccess("task1");
    final SimpleWorkerResourceManagementConfig simpleWorkerResourceManagementConfig = new SimpleWorkerResourceManagementConfig().setWorkerIdleTimeout(new Period(0)).setMaxScalingDuration(new Period(1000)).setNumEventsToTrack(1).setPendingTaskTimeout(new Period(0)).setWorkerVersion("");
    final ResourceManagementSchedulerConfig schedulerConfig = new ResourceManagementSchedulerConfig();
    workerConfig = new AtomicReference<>(new WorkerBehaviorConfig(null, autoScaler));
    simpleResourceManagementStrategy = new SimpleWorkerResourceManagementStrategy(simpleWorkerResourceManagementConfig, DSuppliers.of(workerConfig), schedulerConfig, executorService);
}
Also used : WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Period(org.joda.time.Period) Before(org.junit.Before)

Aggregations

WorkerBehaviorConfig (io.druid.indexing.overlord.setup.WorkerBehaviorConfig)8 ImmutableWorkerInfo (io.druid.indexing.overlord.ImmutableWorkerInfo)4 DateTime (org.joda.time.DateTime)4 Duration (org.joda.time.Duration)4 Function (com.google.common.base.Function)3 Worker (io.druid.indexing.worker.Worker)2 Period (org.joda.time.Period)2 Before (org.junit.Before)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HttpClient (com.metamx.http.client.HttpClient)1 Task (io.druid.indexing.common.task.Task)1 ResourceManagementSchedulerConfig (io.druid.indexing.overlord.autoscaling.ResourceManagementSchedulerConfig)1 SimpleWorkerResourceManagementConfig (io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementConfig)1 SimpleWorkerResourceManagementStrategy (io.druid.indexing.overlord.autoscaling.SimpleWorkerResourceManagementStrategy)1 RemoteTaskRunnerConfig (io.druid.indexing.overlord.config.RemoteTaskRunnerConfig)1 FillCapacityWorkerSelectStrategy (io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy)1 WorkerSelectStrategy (io.druid.indexing.overlord.setup.WorkerSelectStrategy)1 ScheduledExecutorFactory (io.druid.java.util.common.concurrent.ScheduledExecutorFactory)1 IndexerZkConfig (io.druid.server.initialization.IndexerZkConfig)1 ZkPathsConfig (io.druid.server.initialization.ZkPathsConfig)1