use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class S3DataSegmentPuller method getSegmentFiles.
FileUtils.FileCopyResult getSegmentFiles(final CloudObjectLocation s3Coords, final File outDir) throws SegmentLoadingException {
log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
if (!isObjectInBucket(s3Coords)) {
throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
}
try {
FileUtils.mkdirp(outDir);
final URI uri = s3Coords.toUri(S3StorageDruidModule.SCHEME);
final ByteSource byteSource = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
try {
return buildFileObject(uri).openInputStream();
} catch (AmazonServiceException e) {
if (e.getCause() != null) {
if (S3Utils.S3RETRY.apply(e)) {
throw new IOException("Recoverable exception", e);
}
}
throw new RuntimeException(e);
}
}
};
if (CompressionUtils.isZip(s3Coords.getPath())) {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, false);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
return result;
}
if (CompressionUtils.isGz(s3Coords.getPath())) {
final String fname = Files.getNameWithoutExtension(uri.getPath());
final File outFile = new File(outDir, fname);
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
return result;
}
throw new IAE("Do not know how to load file type at [%s]", uri.toString());
} catch (Exception e) {
try {
FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class HyperLogLogCollector method add.
public void add(byte[] hashedValue) {
if (hashedValue.length < MIN_BYTES_REQUIRED) {
throw new IAE("Insufficient bytes, need[%d] got [%d]", MIN_BYTES_REQUIRED, hashedValue.length);
}
estimatedCardinality = null;
final ByteBuffer buffer = ByteBuffer.wrap(hashedValue);
short bucket = (short) (buffer.getShort(hashedValue.length - 2) & BUCKET_MASK);
byte positionOf1 = 0;
for (int i = 0; i < 8; ++i) {
byte lookupVal = ByteBitLookup.LOOKUP[UnsignedBytes.toInt(hashedValue[i])];
switch(lookupVal) {
case 0:
positionOf1 += (byte) 8;
continue;
default:
positionOf1 += lookupVal;
i = 8;
break;
}
}
add(bucket, positionOf1);
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class InputRowSerde method getTypeHelperMap.
public static Map<String, IndexSerdeTypeHelper> getTypeHelperMap(DimensionsSpec dimensionsSpec) {
Map<String, IndexSerdeTypeHelper> typeHelperMap = new HashMap<>();
for (DimensionSchema dimensionSchema : dimensionsSpec.getDimensions()) {
IndexSerdeTypeHelper typeHelper;
switch(dimensionSchema.getColumnType().getType()) {
case STRING:
typeHelper = STRING_HELPER;
break;
case LONG:
typeHelper = LONG_HELPER;
break;
case FLOAT:
typeHelper = FLOAT_HELPER;
break;
case DOUBLE:
typeHelper = DOUBLE_HELPER;
break;
default:
throw new IAE("Invalid type: [%s]", dimensionSchema.getColumnType());
}
typeHelperMap.put(dimensionSchema.getName(), typeHelper);
}
return typeHelperMap;
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class JobHelper method getURIFromSegment.
public static URI getURIFromSegment(DataSegment dataSegment) {
// There is no good way around this...
// TODO: add getURI() to URIDataPuller
final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
final String type = loadSpec.get("type").toString();
final URI segmentLocURI;
if ("s3_zip".equals(type)) {
if ("s3a".equals(loadSpec.get("S3Schema"))) {
segmentLocURI = URI.create(StringUtils.format("s3a://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
} else {
segmentLocURI = URI.create(StringUtils.format("s3n://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
}
} else if ("hdfs".equals(type)) {
segmentLocURI = URI.create(loadSpec.get("path").toString());
} else if ("google".equals(type)) {
// Segment names contain : in their path.
// Google Cloud Storage supports : but Hadoop does not.
// This becomes an issue when re-indexing using the current segments.
// The Hadoop getSplits code doesn't understand the : and returns "Relative path in absolute URI"
// This could be fixed using the same code that generates path names for hdfs segments using
// getHdfsStorageDir. But that wouldn't fix this issue for people who already have segments with ":".
// Because of this we just URL encode the : making everything work as it should.
segmentLocURI = URI.create(StringUtils.format("gs://%s/%s", loadSpec.get("bucket"), StringUtils.replaceChar(loadSpec.get("path").toString(), ':', "%3A")));
} else if ("local".equals(type)) {
try {
segmentLocURI = new URI("file", null, loadSpec.get("path").toString(), null, null);
} catch (URISyntaxException e) {
throw new ISE(e, "Unable to form simple file uri");
}
} else {
try {
throw new IAE("Cannot figure out loadSpec %s", HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(loadSpec));
} catch (JsonProcessingException e) {
throw new ISE("Cannot write Map with json mapper");
}
}
return segmentLocURI;
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class IndexTaskClient method submitRequest.
/**
* Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
*/
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
while (true) {
String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
if (!status.isPresent() || !status.get().isRunnable()) {
throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
}
final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
if (location.equals(TaskLocation.unknown())) {
throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
}
final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
Either<StringFullResponseHolder, FinalType> response = null;
try {
// Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
// for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
response = submitRequest(request, responseHandler);
if (response.isValue()) {
return response.valueOrThrow();
} else {
final StringBuilder exceptionMessage = new StringBuilder();
final HttpResponseStatus httpResponseStatus = response.error().getStatus();
final String httpResponseContent = response.error().getContent();
exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
if (!Strings.isNullOrEmpty(httpResponseContent)) {
final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
}
if (httpResponseStatus.getCode() == 400) {
// don't bother retrying if it's a bad request
throw new IAE(exceptionMessage.toString());
} else {
throw new IOE(exceptionMessage.toString());
}
}
} catch (IOException | ChannelException e) {
// Since workers are free to move tasks around to different ports, there is a chance that a task may have been
// moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
// identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
// worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
// we will wait for a short period then retry the request indefinitely, expecting the task's location to
// eventually be updated.
final Duration delay;
if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
if (headerId != null && !headerId.equals(taskId)) {
log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
} else {
delay = retryPolicy.getAndIncrementRetryDelay();
}
final String urlForLog = request.getUrl().toString();
if (!retry) {
// if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
// for informational purposes only); log at INFO instead of WARN.
log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else if (delay == null) {
// When retrying, log the final failure at WARN level, since it is likely to be bad news.
log.warn(e, "submitRequest failed for [%s]", urlForLog);
throw e;
} else {
try {
final long sleepTime = delay.getMillis();
// When retrying, log non-final failures at INFO level.
log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
e.addSuppressed(e2);
throw new RuntimeException(e);
}
}
} catch (NoTaskLocationException e) {
log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
throw e;
} catch (Exception e) {
log.warn(e, "Exception while sending request");
throw e;
}
}
}
Aggregations