use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class HdfsClasspathSetupTest method setupStatic.
@BeforeClass
public static void setupStatic() throws IOException {
hdfsTmpDir = File.createTempFile("hdfsClasspathSetupTest", "dir");
if (!hdfsTmpDir.delete()) {
throw new IOE("Unable to delete hdfsTmpDir [%s]", hdfsTmpDir.getAbsolutePath());
}
conf = new Configuration(true);
localFS = new LocalFileSystem();
localFS.initialize(hdfsTmpDir.toURI(), conf);
localFS.setWorkingDirectory(new Path(hdfsTmpDir.toURI()));
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class JobHelper method renameIndexFilesForSegments.
/**
* Renames the index files for the segments. This works around some limitations of both FileContext (no s3n support) and NativeS3FileSystem.rename
* which will not overwrite. Note: segments should be renamed in the index task, not in a hadoop job, as race
* conditions between job retries can cause the final segment index file path to get clobbered.
*
* @param indexerSchema the hadoop ingestion spec
* @param segmentAndIndexZipFilePaths the list of segments with their currently stored tmp path and the final path
* that they should be renamed to.
*/
public static void renameIndexFilesForSegments(HadoopIngestionSpec indexerSchema, List<DataSegmentAndIndexZipFilePath> segmentAndIndexZipFilePaths) throws IOException {
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(indexerSchema);
final Configuration configuration = JobHelper.injectSystemProperties(new Configuration(), config);
config.addJobProperties(configuration);
JobHelper.injectDruidProperties(configuration, config);
for (DataSegmentAndIndexZipFilePath segmentAndIndexZipFilePath : segmentAndIndexZipFilePaths) {
Path tmpPath = new Path(segmentAndIndexZipFilePath.getTmpIndexZipFilePath());
Path finalIndexZipFilePath = new Path(segmentAndIndexZipFilePath.getFinalIndexZipFilePath());
final FileSystem outputFS = FileSystem.get(finalIndexZipFilePath.toUri(), configuration);
if (!renameIndexFile(outputFS, tmpPath, finalIndexZipFilePath)) {
throw new IOE("Unable to rename [%s] to [%s]", tmpPath.toUri().toString(), finalIndexZipFilePath.toUri().toString());
}
}
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class JobHelper method addJarToClassPath.
static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath, FileSystem fs, Job job) throws IOException {
// Create distributed directory if it does not exist.
// rename will always fail if destination does not exist.
fs.mkdirs(distributedClassPath);
// Non-snapshot jar files are uploaded to the shared classpath.
final Path hdfsPath = new Path(distributedClassPath, jarFile.getName());
if (shouldUploadOrReplace(jarFile, hdfsPath, fs)) {
// Muliple jobs can try to upload the jar here,
// to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath.
final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName());
uploadJar(jarFile, intermediateHdfsPath, fs);
IOException exception = null;
try {
log.info("Renaming jar to path[%s]", hdfsPath);
fs.rename(intermediateHdfsPath, hdfsPath);
if (!fs.exists(hdfsPath)) {
throw new IOE("File does not exist even after moving from[%s] to [%s]", intermediateHdfsPath, hdfsPath);
}
} catch (IOException e) {
// rename failed, possibly due to race condition. check if some other job has uploaded the jar file.
try {
if (!fs.exists(hdfsPath)) {
log.error(e, "IOException while Renaming jar file");
exception = e;
}
} catch (IOException e1) {
e.addSuppressed(e1);
exception = e;
}
} finally {
try {
if (fs.exists(intermediateHdfsPath)) {
fs.delete(intermediateHdfsPath, false);
}
} catch (IOException e) {
if (exception == null) {
exception = e;
} else {
exception.addSuppressed(e);
}
}
if (exception != null) {
throw exception;
}
}
}
job.addFileToClassPath(hdfsPath);
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class IndexTaskClient method submitRequest.
/**
* Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
*/
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
while (true) {
String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
if (!status.isPresent() || !status.get().isRunnable()) {
throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
}
final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
if (location.equals(TaskLocation.unknown())) {
throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
}
final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
Either<StringFullResponseHolder, FinalType> response = null;
try {
// Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
// for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
response = submitRequest(request, responseHandler);
if (response.isValue()) {
return response.valueOrThrow();
} else {
final StringBuilder exceptionMessage = new StringBuilder();
final HttpResponseStatus httpResponseStatus = response.error().getStatus();
final String httpResponseContent = response.error().getContent();
exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
if (!Strings.isNullOrEmpty(httpResponseContent)) {
final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
}
if (httpResponseStatus.getCode() == 400) {
// don't bother retrying if it's a bad request
throw new IAE(exceptionMessage.toString());
} else {
throw new IOE(exceptionMessage.toString());
}
}
} catch (IOException | ChannelException e) {
// Since workers are free to move tasks around to different ports, there is a chance that a task may have been
// moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
// identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
// worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
// we will wait for a short period then retry the request indefinitely, expecting the task's location to
// eventually be updated.
final Duration delay;
if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
if (headerId != null && !headerId.equals(taskId)) {
log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
final String urlForLog = request.getUrl().toString();
if (!retry) {
// if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
// for informational purposes only); log at INFO instead of WARN.
log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else if (delay == null) {
// When retrying, log the final failure at WARN level, since it is likely to be bad news.
log.warn(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else {
try {
final long sleepTime = delay.getMillis();
// When retrying, log non-final failures at INFO level.
log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
e.addSuppressed(e2);
throw new RuntimeException(e);
}
}
} catch (NoTaskLocationException e) {
log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
throw e;
} catch (Exception e) {
log.warn(e, "Exception while sending request");
throw e;
}
}
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class RemoteTaskActionClient method submit.
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
log.debug("Performing action for task[%s]: %s", task.getId(), taskAction);
byte[] dataToSend = jsonMapper.writeValueAsBytes(new TaskActionHolder(task, taskAction));
final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
while (true) {
try {
final StringFullResponseHolder fullResponseHolder;
log.debug("Submitting action for task[%s] to Overlord: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
fullResponseHolder = druidLeaderClient.go(druidLeaderClient.makeRequest(HttpMethod.POST, "/druid/indexer/v1/action").setContent(MediaType.APPLICATION_JSON, dataToSend));
if (fullResponseHolder.getStatus().getCode() / 100 == 2) {
final Map<String, Object> responseDict = jsonMapper.readValue(fullResponseHolder.getContent(), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference());
} else {
// Want to retry, so throw an IOException.
throw new IOE("Error with status[%s] and message[%s]. Check overlord logs for details.", fullResponseHolder.getStatus(), fullResponseHolder.getContent());
}
} catch (IOException | ChannelException e) {
log.noStackTrace().warn(e, "Exception submitting action for task[%s]: %s", task.getId(), jsonMapper.writeValueAsString(taskAction));
final Duration delay = retryPolicy.getAndIncrementRetryDelay();
if (delay == null) {
throw e;
} else {
try {
final long sleepTime = jitter(delay.getMillis());
log.warn("Will try again in [%s].", new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
} catch (InterruptedException e2) {
throw new RuntimeException(e2);
}
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
Aggregations