Search in sources :

Example 96 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class TaskLockbox method revokeLock.

/**
 * Mark the lock as revoked. Note that revoked locks are NOT removed. Instead, they are maintained in {@link #running}
 * and {@link #taskStorage} as the normal locks do. This is to check locks are revoked when they are requested to be
 * acquired and notify to the callers if revoked. Revoked locks are removed by calling
 * {@link #unlock(Task, Interval)}.
 *
 * @param taskId an id of the task holding the lock
 * @param lock   lock to be revoked
 */
@VisibleForTesting
protected void revokeLock(String taskId, TaskLock lock) {
    giant.lock();
    try {
        if (!activeTasks.contains(taskId)) {
            throw new ISE("Cannot revoke lock for inactive task[%s]", taskId);
        }
        final Task task = taskStorage.getTask(taskId).orNull();
        if (task == null) {
            throw new ISE("Cannot revoke lock for unknown task[%s]", taskId);
        }
        log.info("Revoking task lock[%s] for task[%s]", lock, taskId);
        if (lock.isRevoked()) {
            log.warn("TaskLock[%s] is already revoked", lock);
        } else {
            final TaskLock revokedLock = lock.revokedCopy();
            taskStorage.replaceLock(taskId, lock, revokedLock);
            final List<TaskLockPosse> possesHolder = running.get(task.getDataSource()).get(lock.getInterval().getStart()).get(lock.getInterval());
            final TaskLockPosse foundPosse = possesHolder.stream().filter(posse -> posse.getTaskLock().equals(lock)).findFirst().orElseThrow(() -> new ISE("Failed to find lock posse for lock[%s]", lock));
            possesHolder.remove(foundPosse);
            possesHolder.add(foundPosse.withTaskLock(revokedLock));
            log.info("Revoked taskLock[%s]", lock);
        }
    } finally {
        giant.unlock();
    }
}
Also used : Task(org.apache.druid.indexing.common.task.Task) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 97 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class SeekableStreamIndexTaskClient method pause.

public Map<PartitionIdType, SequenceOffsetType> pause(final String id) {
    log.debug("Pause task[%s]", id);
    try {
        final StringFullResponseHolder response = submitRequestWithEmptyContent(id, HttpMethod.POST, "pause", null, true);
        final HttpResponseStatus responseStatus = response.getStatus();
        final String responseContent = response.getContent();
        if (responseStatus.equals(HttpResponseStatus.OK)) {
            log.info("Task [%s] paused successfully", id);
            return deserializeMap(responseContent, Map.class, getPartitionType(), getSequenceType());
        } else if (responseStatus.equals(HttpResponseStatus.ACCEPTED)) {
            // The task received the pause request, but its status hasn't been changed yet.
            final RetryPolicy retryPolicy = newRetryPolicy();
            while (true) {
                final SeekableStreamIndexTaskRunner.Status status = getStatus(id);
                if (status == SeekableStreamIndexTaskRunner.Status.PAUSED) {
                    return getCurrentOffsets(id, true);
                }
                final Duration delay = retryPolicy.getAndIncrementRetryDelay();
                if (delay == null) {
                    throw new ISE("Task [%s] failed to change its status from [%s] to [%s], aborting", id, status, SeekableStreamIndexTaskRunner.Status.PAUSED);
                } else {
                    final long sleepTime = delay.getMillis();
                    log.info("Still waiting for task [%s] to change its status to [%s]; will try again in [%s]", id, SeekableStreamIndexTaskRunner.Status.PAUSED, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                }
            }
        } else {
            throw new ISE("Pause request for task [%s] failed with response [%s] : [%s]", id, responseStatus, responseContent);
        }
    } catch (NoTaskLocationException e) {
        log.error("Exception [%s] while pausing Task [%s]", e.getMessage(), id);
        return ImmutableMap.of();
    } catch (IOException | InterruptedException e) {
        throw new RE(e, "Exception [%s] while pausing Task [%s]", e.getMessage(), id);
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) RE(org.apache.druid.java.util.common.RE) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Duration(org.joda.time.Duration) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy)

Example 98 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class TaskLifecycleTest method testLockRevoked.

@Test
public void testLockRevoked() throws Exception {
    final Task task = new AbstractFixedIntervalTask("id1", "id1", new TaskResource("id1", 1), "ds", Intervals.of("2012-01-01/P1D"), null) {

        @Override
        public String getType() {
            return "test";
        }

        @Override
        public void stopGracefully(TaskConfig taskConfig) {
        }

        @Override
        public TaskStatus run(TaskToolbox toolbox) throws Exception {
            final Interval interval = Intervals.of("2012-01-01/P1D");
            final TimeChunkLockTryAcquireAction action = new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, interval);
            final TaskLock lock = toolbox.getTaskActionClient().submit(action);
            if (lock == null) {
                throw new ISE("Failed to get a lock");
            }
            final TaskLock lockBeforeRevoke = toolbox.getTaskActionClient().submit(action);
            Assert.assertFalse(lockBeforeRevoke.isRevoked());
            taskLockbox.revokeLock(getId(), lock);
            final TaskLock lockAfterRevoke = toolbox.getTaskActionClient().submit(action);
            Assert.assertTrue(lockAfterRevoke.isRevoked());
            return TaskStatus.failure(getId(), "lock revoked test");
        }
    };
    final TaskStatus status = runTask(task);
    Assert.assertEquals(taskLocation, status.getLocation());
    Assert.assertEquals("statusCode", TaskState.FAILED, status.getStatusCode());
    Assert.assertEquals("segments published", 0, mdc.getPublished().size());
    Assert.assertEquals("segments nuked", 0, mdc.getNuked().size());
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) IndexTask(org.apache.druid.indexing.common.task.IndexTask) KillUnusedSegmentsTask(org.apache.druid.indexing.common.task.KillUnusedSegmentsTask) Task(org.apache.druid.indexing.common.task.Task) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskLock(org.apache.druid.indexing.common.TaskLock) TimeChunkLockTryAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockTryAcquireAction) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) DefaultTaskConfig(org.apache.druid.indexing.overlord.config.DefaultTaskConfig) ISE(org.apache.druid.java.util.common.ISE) TaskStatus(org.apache.druid.indexer.TaskStatus) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) Interval(org.joda.time.Interval) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 99 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class HttpRemoteTaskRunnerTest method testTaskRunnerRestart.

/*
  Simulates restart of the Overlord where taskRunner, on start, discovers workers with prexisting tasks.
   */
@Test(timeout = 60_000L)
public void testTaskRunnerRestart() throws Exception {
    TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
    DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
    EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
    EasyMock.replay(druidNodeDiscoveryProvider);
    ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
    Task task1 = NoopTask.create("task-id-1", 0);
    Task task2 = NoopTask.create("task-id-2", 0);
    Task task3 = NoopTask.create("task-id-3", 0);
    Task task4 = NoopTask.create("task-id-4", 0);
    Task task5 = NoopTask.create("task-id-5", 0);
    TaskStorage taskStorageMock = EasyMock.createStrictMock(TaskStorage.class);
    EasyMock.expect(taskStorageMock.getStatus(task1.getId())).andReturn(Optional.absent());
    EasyMock.expect(taskStorageMock.getStatus(task2.getId())).andReturn(Optional.absent()).times(2);
    EasyMock.expect(taskStorageMock.getStatus(task3.getId())).andReturn(Optional.of(TaskStatus.running(task3.getId())));
    EasyMock.expect(taskStorageMock.getStatus(task4.getId())).andReturn(Optional.of(TaskStatus.running(task4.getId())));
    EasyMock.expect(taskStorageMock.getStatus(task5.getId())).andReturn(Optional.of(TaskStatus.success(task5.getId())));
    EasyMock.replay(taskStorageMock);
    HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {

        @Override
        public int getPendingTasksRunnerNumThreads() {
            return 3;
        }
    }, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorageMock, EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {

        @Override
        protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
            if (workerHolders.containsKey(worker.getHost())) {
                return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
            } else {
                throw new ISE("No WorkerHolder for [%s].", worker.getHost());
            }
        }
    };
    taskRunner.start();
    DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
    AtomicInteger ticks = new AtomicInteger();
    Set<String> taskShutdowns = new HashSet<>();
    workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.success(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task3, TaskStatus.success(task3.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task4, TaskStatus.running(task4.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task5, TaskStatus.running(task5.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), ticks, taskShutdowns));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
    while (ticks.get() < 1) {
        Thread.sleep(100);
    }
    EasyMock.verify(taskStorageMock);
    Assert.assertEquals(ImmutableSet.of(task2.getId(), task5.getId()), taskShutdowns);
    Assert.assertTrue(taskRunner.getPendingTasks().isEmpty());
    TaskRunnerWorkItem item = Iterables.getOnlyElement(taskRunner.getRunningTasks());
    Assert.assertEquals(task4.getId(), item.getTaskId());
    Assert.assertTrue(taskRunner.run(task3).get().isSuccess());
    Assert.assertEquals(2, taskRunner.getKnownTasks().size());
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) WorkerNodeService(org.apache.druid.discovery.WorkerNodeService) CuratorFramework(org.apache.curator.framework.CuratorFramework) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) Worker(org.apache.druid.indexing.worker.Worker) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ISE(org.apache.druid.java.util.common.ISE) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ConcurrentHashSet(org.eclipse.jetty.util.ConcurrentHashSet) HashSet(java.util.HashSet) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AtomicReference(java.util.concurrent.atomic.AtomicReference) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) HttpClient(org.apache.druid.java.util.http.client.HttpClient) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) DruidNode(org.apache.druid.server.DruidNode) Test(org.junit.Test)

Example 100 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class RemoteTaskRunnerRunPendingTasksConcurrencyTest method testConcurrency.

// This task reproduces the races described in https://github.com/apache/druid/issues/2842
@Test(timeout = 60_000L)
public void testConcurrency() throws Exception {
    rtrTestUtils.makeWorker("worker0", 3);
    rtrTestUtils.makeWorker("worker1", 3);
    remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(new Period("PT3600S")) {

        @Override
        public int getPendingTasksRunnerNumThreads() {
            return 2;
        }
    });
    int numTasks = 6;
    ListenableFuture<TaskStatus>[] results = new ListenableFuture[numTasks];
    Task[] tasks = new Task[numTasks];
    // 2 tasks
    for (int i = 0; i < 2; i++) {
        tasks[i] = TestTasks.unending("task" + i);
        results[i] = (remoteTaskRunner.run(tasks[i]));
    }
    waitForBothWorkersToHaveUnackedTasks();
    // 3 more tasks, all of which get queued up
    for (int i = 2; i < 5; i++) {
        tasks[i] = TestTasks.unending("task" + i);
        results[i] = (remoteTaskRunner.run(tasks[i]));
    }
    // simulate completion of task0 and task1
    mockWorkerRunningAndCompletionSuccessfulTasks(tasks[0], tasks[1]);
    Assert.assertEquals(TaskState.SUCCESS, results[0].get().getStatusCode());
    Assert.assertEquals(TaskState.SUCCESS, results[1].get().getStatusCode());
    // now both threads race to run the last 3 tasks. task2 and task3 are being assigned
    waitForBothWorkersToHaveUnackedTasks();
    if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId())) {
        remoteTaskRunner.shutdown("task4", "test");
        mockWorkerRunningAndCompletionSuccessfulTasks(tasks[3], tasks[2]);
        Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode());
        Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode());
    } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId())) {
        remoteTaskRunner.shutdown("task2", "test");
        mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[3]);
        Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode());
        Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode());
    } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId())) {
        remoteTaskRunner.shutdown("task3", "test");
        mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[2]);
        Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode());
        Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode());
    } else {
        throw new ISE("two out of three tasks 2,3 and 4 must be waiting for ack.");
    }
    // ensure that RTR is doing OK and still making progress
    tasks[5] = TestTasks.unending("task5");
    results[5] = remoteTaskRunner.run(tasks[5]);
    waitForOneWorkerToHaveUnackedTasks();
    if (rtrTestUtils.taskAnnounced("worker0", tasks[5].getId())) {
        rtrTestUtils.mockWorkerRunningTask("worker0", tasks[5]);
        rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", tasks[5]);
    } else {
        rtrTestUtils.mockWorkerRunningTask("worker1", tasks[5]);
        rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", tasks[5]);
    }
    Assert.assertEquals(TaskState.SUCCESS, results[5].get().getStatusCode());
}
Also used : Task(org.apache.druid.indexing.common.task.Task) Period(org.joda.time.Period) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(org.apache.druid.java.util.common.ISE) Test(org.junit.Test)

Aggregations

ISE (org.apache.druid.java.util.common.ISE)354 IOException (java.io.IOException)95 ArrayList (java.util.ArrayList)90 Map (java.util.Map)68 List (java.util.List)60 File (java.io.File)48 Interval (org.joda.time.Interval)48 DataSegment (org.apache.druid.timeline.DataSegment)44 HashMap (java.util.HashMap)43 Nullable (javax.annotation.Nullable)43 URL (java.net.URL)36 StatusResponseHolder (org.apache.druid.java.util.http.client.response.StatusResponseHolder)33 Request (org.apache.druid.java.util.http.client.Request)30 ExecutionException (java.util.concurrent.ExecutionException)29 ImmutableMap (com.google.common.collect.ImmutableMap)28 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)28 VisibleForTesting (com.google.common.annotations.VisibleForTesting)27 Collectors (java.util.stream.Collectors)27 IAE (org.apache.druid.java.util.common.IAE)27 ImmutableList (com.google.common.collect.ImmutableList)26