Search in sources :

Example 81 with Duration

use of org.joda.time.Duration in project druid by druid-io.

the class KillSupervisorsTest method testConstructorFailIfInvalidRetainDuration.

@Test
public void testConstructorFailIfInvalidRetainDuration() {
    TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig(null, null, null, new Duration("PT5S"), null, null, null, new Duration("PT6S"), new Duration("PT-1S"), null, null, null, null, null, null, null, 10, null);
    exception.expect(IllegalArgumentException.class);
    exception.expectMessage("Coordinator supervisor kill retainDuration must be >= 0");
    killSupervisors = new KillSupervisors(druidCoordinatorConfig, mockMetadataSupervisorManager);
}
Also used : TestDruidCoordinatorConfig(org.apache.druid.server.coordinator.TestDruidCoordinatorConfig) Duration(org.joda.time.Duration) Test(org.junit.Test)

Example 82 with Duration

use of org.joda.time.Duration in project druid by druid-io.

the class KinesisSupervisorTest method getTestableSupervisor.

private TestableKinesisSupervisor getTestableSupervisor(int replicas, int taskCount, boolean useEarliestOffset, String duration, Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, boolean suspended, Integer recordsPerFetch, Integer fetchDelayMillis, AutoScalerConfig autoScalerConfig) {
    KinesisSupervisorIOConfig kinesisSupervisorIOConfig = new KinesisSupervisorIOConfig(STREAM, INPUT_FORMAT, "awsEndpoint", null, replicas, taskCount, new Period(duration), new Period("P1D"), new Period("PT30S"), useEarliestOffset, new Period("PT30M"), lateMessageRejectionPeriod, earlyMessageRejectionPeriod, null, recordsPerFetch, fetchDelayMillis, null, null, autoScalerConfig, false);
    KinesisIndexTaskClientFactory taskClientFactory = new KinesisIndexTaskClientFactory(null, null) {

        @Override
        public KinesisIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
            Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
            Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
            Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
            return taskClient;
        }
    };
    return new TestableKinesisSupervisor(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new KinesisSupervisorSpec(null, dataSchema, tuningConfig, kinesisSupervisorIOConfig, null, suspended, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, null, supervisorConfig), rowIngestionMetersFactory);
}
Also used : KinesisIndexTaskClientFactory(org.apache.druid.indexing.kinesis.KinesisIndexTaskClientFactory) DruidMonitorSchedulerConfig(org.apache.druid.server.metrics.DruidMonitorSchedulerConfig) Period(org.joda.time.Period) Duration(org.joda.time.Duration) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider)

Example 83 with Duration

use of org.joda.time.Duration in project druid by druid-io.

the class KinesisSupervisorTest method getTestableSupervisorCustomIsTaskCurrent.

/**
 * Use when you want to mock the return value of SeekableStreamSupervisor#isTaskCurrent()
 */
private TestableKinesisSupervisor getTestableSupervisorCustomIsTaskCurrent(int replicas, int taskCount, boolean useEarliestOffset, String duration, Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, boolean suspended, Integer recordsPerFetch, Integer fetchDelayMillis, boolean isTaskCurrentReturn) {
    KinesisSupervisorIOConfig kinesisSupervisorIOConfig = new KinesisSupervisorIOConfig(STREAM, INPUT_FORMAT, "awsEndpoint", null, replicas, taskCount, new Period(duration), new Period("P1D"), new Period("PT30S"), useEarliestOffset, new Period("PT30M"), lateMessageRejectionPeriod, earlyMessageRejectionPeriod, null, recordsPerFetch, fetchDelayMillis, null, null, null, false);
    KinesisIndexTaskClientFactory taskClientFactory = new KinesisIndexTaskClientFactory(null, null) {

        @Override
        public KinesisIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
            Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
            Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
            Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
            return taskClient;
        }
    };
    return new TestableKinesisSupervisorWithCustomIsTaskCurrent(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new KinesisSupervisorSpec(null, dataSchema, tuningConfig, kinesisSupervisorIOConfig, null, suspended, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, null, supervisorConfig), rowIngestionMetersFactory, isTaskCurrentReturn);
}
Also used : KinesisIndexTaskClientFactory(org.apache.druid.indexing.kinesis.KinesisIndexTaskClientFactory) DruidMonitorSchedulerConfig(org.apache.druid.server.metrics.DruidMonitorSchedulerConfig) Period(org.joda.time.Period) Duration(org.joda.time.Duration) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider)

Example 84 with Duration

use of org.joda.time.Duration in project druid by druid-io.

the class RemoteTaskActionClient method submit.

@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
    log.debug("Performing action for task[%s]: %s", task.getId(), taskAction);
    byte[] dataToSend = jsonMapper.writeValueAsBytes(new TaskActionHolder(task, taskAction));
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        try {
            final StringFullResponseHolder fullResponseHolder;
            log.debug("Submitting action for task[%s] to Overlord: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
            fullResponseHolder = druidLeaderClient.go(druidLeaderClient.makeRequest(HttpMethod.POST, "/druid/indexer/v1/action").setContent(MediaType.APPLICATION_JSON, dataToSend));
            if (fullResponseHolder.getStatus().getCode() / 100 == 2) {
                final Map<String, Object> responseDict = jsonMapper.readValue(fullResponseHolder.getContent(), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
                return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference());
            } else {
                // Want to retry, so throw an IOException.
                throw new IOE("Error with status[%s] and message[%s]. Check overlord logs for details.", fullResponseHolder.getStatus(), fullResponseHolder.getContent());
            }
        } catch (IOException | ChannelException e) {
            log.noStackTrace().warn(e, "Exception submitting action for task[%s]: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
            final Duration delay = retryPolicy.getAndIncrementRetryDelay();
            if (delay == null) {
                throw e;
            } else {
                try {
                    final long sleepTime = jitter(delay.getMillis());
                    log.warn("Will try again in [%s].", new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    throw new RuntimeException(e2);
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : Duration(org.joda.time.Duration) IOException(java.io.IOException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Example 85 with Duration

use of org.joda.time.Duration in project druid by druid-io.

the class IndexTaskClient method submitRequest.

/**
 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
 */
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        }
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        }
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                }
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
                }
            }
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
                }
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            }
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                    e.addSuppressed(e2);
                    throw new RuntimeException(e);
                }
            }
        } catch (NoTaskLocationException e) {
            log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
        }
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request(org.apache.druid.java.util.http.client.Request) Duration(org.joda.time.Duration) IOException(java.io.IOException) TaskStatus(org.apache.druid.indexer.TaskStatus) IAE(org.apache.druid.java.util.common.IAE) TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException(java.net.MalformedURLException) ChannelException(org.jboss.netty.channel.ChannelException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Aggregations

Duration (org.joda.time.Duration)272 Test (org.junit.Test)148 Instant (org.joda.time.Instant)66 DateTime (org.joda.time.DateTime)32 Period (org.joda.time.Period)27 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)24 TestDruidCoordinatorConfig (org.apache.druid.server.coordinator.TestDruidCoordinatorConfig)22 HashMap (java.util.HashMap)18 IOException (java.io.IOException)17 Category (org.junit.experimental.categories.Category)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)15 KV (org.apache.beam.sdk.values.KV)15 AtomicReference (java.util.concurrent.atomic.AtomicReference)13 IndexSpec (org.apache.druid.segment.IndexSpec)12 Set (java.util.Set)10 GlobalWindows (org.apache.beam.sdk.transforms.windowing.GlobalWindows)10 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)10 Interval (org.joda.time.Interval)10 Request (com.metamx.http.client.Request)9