use of org.joda.time.Duration in project druid by druid-io.
the class KillSupervisorsTest method testConstructorFailIfInvalidRetainDuration.
@Test
public void testConstructorFailIfInvalidRetainDuration() {
TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig(null, null, null, new Duration("PT5S"), null, null, null, new Duration("PT6S"), new Duration("PT-1S"), null, null, null, null, null, null, null, 10, null);
exception.expect(IllegalArgumentException.class);
exception.expectMessage("Coordinator supervisor kill retainDuration must be >= 0");
killSupervisors = new KillSupervisors(druidCoordinatorConfig, mockMetadataSupervisorManager);
}
use of org.joda.time.Duration in project druid by druid-io.
the class KinesisSupervisorTest method getTestableSupervisor.
private TestableKinesisSupervisor getTestableSupervisor(int replicas, int taskCount, boolean useEarliestOffset, String duration, Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, boolean suspended, Integer recordsPerFetch, Integer fetchDelayMillis, AutoScalerConfig autoScalerConfig) {
KinesisSupervisorIOConfig kinesisSupervisorIOConfig = new KinesisSupervisorIOConfig(STREAM, INPUT_FORMAT, "awsEndpoint", null, replicas, taskCount, new Period(duration), new Period("P1D"), new Period("PT30S"), useEarliestOffset, new Period("PT30M"), lateMessageRejectionPeriod, earlyMessageRejectionPeriod, null, recordsPerFetch, fetchDelayMillis, null, null, autoScalerConfig, false);
KinesisIndexTaskClientFactory taskClientFactory = new KinesisIndexTaskClientFactory(null, null) {
@Override
public KinesisIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
return taskClient;
}
};
return new TestableKinesisSupervisor(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new KinesisSupervisorSpec(null, dataSchema, tuningConfig, kinesisSupervisorIOConfig, null, suspended, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, null, supervisorConfig), rowIngestionMetersFactory);
}
use of org.joda.time.Duration in project druid by druid-io.
the class KinesisSupervisorTest method getTestableSupervisorCustomIsTaskCurrent.
/**
* Use when you want to mock the return value of SeekableStreamSupervisor#isTaskCurrent()
*/
private TestableKinesisSupervisor getTestableSupervisorCustomIsTaskCurrent(int replicas, int taskCount, boolean useEarliestOffset, String duration, Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, boolean suspended, Integer recordsPerFetch, Integer fetchDelayMillis, boolean isTaskCurrentReturn) {
KinesisSupervisorIOConfig kinesisSupervisorIOConfig = new KinesisSupervisorIOConfig(STREAM, INPUT_FORMAT, "awsEndpoint", null, replicas, taskCount, new Period(duration), new Period("P1D"), new Period("PT30S"), useEarliestOffset, new Period("PT30M"), lateMessageRejectionPeriod, earlyMessageRejectionPeriod, null, recordsPerFetch, fetchDelayMillis, null, null, null, false);
KinesisIndexTaskClientFactory taskClientFactory = new KinesisIndexTaskClientFactory(null, null) {
@Override
public KinesisIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
return taskClient;
}
};
return new TestableKinesisSupervisorWithCustomIsTaskCurrent(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new KinesisSupervisorSpec(null, dataSchema, tuningConfig, kinesisSupervisorIOConfig, null, suspended, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, null, supervisorConfig), rowIngestionMetersFactory, isTaskCurrentReturn);
}
use of org.joda.time.Duration in project druid by druid-io.
the class RemoteTaskActionClient method submit.
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
log.debug("Performing action for task[%s]: %s", task.getId(), taskAction);
byte[] dataToSend = jsonMapper.writeValueAsBytes(new TaskActionHolder(task, taskAction));
final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
while (true) {
try {
final StringFullResponseHolder fullResponseHolder;
log.debug("Submitting action for task[%s] to Overlord: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
fullResponseHolder = druidLeaderClient.go(druidLeaderClient.makeRequest(HttpMethod.POST, "/druid/indexer/v1/action").setContent(MediaType.APPLICATION_JSON, dataToSend));
if (fullResponseHolder.getStatus().getCode() / 100 == 2) {
final Map<String, Object> responseDict = jsonMapper.readValue(fullResponseHolder.getContent(), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference());
} else {
// Want to retry, so throw an IOException.
throw new IOE("Error with status[%s] and message[%s]. Check overlord logs for details.", fullResponseHolder.getStatus(), fullResponseHolder.getContent());
}
} catch (IOException | ChannelException e) {
log.noStackTrace().warn(e, "Exception submitting action for task[%s]: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
final Duration delay = retryPolicy.getAndIncrementRetryDelay();
if (delay == null) {
throw e;
} else {
try {
final long sleepTime = jitter(delay.getMillis());
log.warn("Will try again in [%s].", new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
} catch (InterruptedException e2) {
throw new RuntimeException(e2);
}
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
use of org.joda.time.Duration in project druid by druid-io.
the class IndexTaskClient method submitRequest.
/**
* Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
*/
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
while (true) {
String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
if (!status.isPresent() || !status.get().isRunnable()) {
throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
}
final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
if (location.equals(TaskLocation.unknown())) {
throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
}
final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
Either<StringFullResponseHolder, FinalType> response = null;
try {
// Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
// for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
response = submitRequest(request, responseHandler);
if (response.isValue()) {
return response.valueOrThrow();
} else {
final StringBuilder exceptionMessage = new StringBuilder();
final HttpResponseStatus httpResponseStatus = response.error().getStatus();
final String httpResponseContent = response.error().getContent();
exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
if (!Strings.isNullOrEmpty(httpResponseContent)) {
final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
}
if (httpResponseStatus.getCode() == 400) {
// don't bother retrying if it's a bad request
throw new IAE(exceptionMessage.toString());
} else {
throw new IOE(exceptionMessage.toString());
}
}
} catch (IOException | ChannelException e) {
// Since workers are free to move tasks around to different ports, there is a chance that a task may have been
// moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
// identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
// worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
// we will wait for a short period then retry the request indefinitely, expecting the task's location to
// eventually be updated.
final Duration delay;
if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
if (headerId != null && !headerId.equals(taskId)) {
log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
final String urlForLog = request.getUrl().toString();
if (!retry) {
// if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
// for informational purposes only); log at INFO instead of WARN.
log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else if (delay == null) {
// When retrying, log the final failure at WARN level, since it is likely to be bad news.
log.warn(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else {
try {
final long sleepTime = delay.getMillis();
// When retrying, log non-final failures at INFO level.
log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
e.addSuppressed(e2);
throw new RuntimeException(e);
}
}
} catch (NoTaskLocationException e) {
log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
throw e;
} catch (Exception e) {
log.warn(e, "Exception while sending request");
throw e;
}
}
}
Aggregations