Search in sources :

Example 26 with IAE

use of in project druid by druid-io.

the class S3DataSegmentPuller method getSegmentFiles.

FileUtils.FileCopyResult getSegmentFiles(final CloudObjectLocation s3Coords, final File outDir) throws SegmentLoadingException {"Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
    if (!isObjectInBucket(s3Coords)) {
        throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    try {
        final URI uri = s3Coords.toUri(S3StorageDruidModule.SCHEME);
        final ByteSource byteSource = new ByteSource() {

            public InputStream openStream() throws IOException {
                try {
                    return buildFileObject(uri).openInputStream();
                } catch (AmazonServiceException e) {
                    if (e.getCause() != null) {
                        if (S3Utils.S3RETRY.apply(e)) {
                            throw new IOException("Recoverable exception", e);
                    throw new RuntimeException(e);
        if (CompressionUtils.isZip(s3Coords.getPath())) {
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, false);
  "Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
            return result;
        if (CompressionUtils.isGz(s3Coords.getPath())) {
            final String fname = Files.getNameWithoutExtension(uri.getPath());
            final File outFile = new File(outDir, fname);
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
  "Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
            return result;
        throw new IAE("Do not know how to load file type at [%s]", uri.toString());
    } catch (Exception e) {
        try {
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
        throw new SegmentLoadingException(e, e.getMessage());
Also used : SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) FileUtils( AmazonServiceException(com.amazonaws.AmazonServiceException) ByteSource( IOException( IAE( URI( File( SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) AmazonS3Exception( AmazonServiceException(com.amazonaws.AmazonServiceException) IOException( AmazonClientException(com.amazonaws.AmazonClientException)

Example 27 with IAE

use of in project druid by druid-io.

the class HyperLogLogCollector method add.

public void add(byte[] hashedValue) {
    if (hashedValue.length < MIN_BYTES_REQUIRED) {
        throw new IAE("Insufficient bytes, need[%d] got [%d]", MIN_BYTES_REQUIRED, hashedValue.length);
    estimatedCardinality = null;
    final ByteBuffer buffer = ByteBuffer.wrap(hashedValue);
    short bucket = (short) (buffer.getShort(hashedValue.length - 2) & BUCKET_MASK);
    byte positionOf1 = 0;
    for (int i = 0; i < 8; ++i) {
        byte lookupVal = ByteBitLookup.LOOKUP[UnsignedBytes.toInt(hashedValue[i])];
        switch(lookupVal) {
            case 0:
                positionOf1 += (byte) 8;
                positionOf1 += lookupVal;
                i = 8;
    add(bucket, positionOf1);
Also used : IAE( ByteBuffer(java.nio.ByteBuffer)

Example 28 with IAE

use of in project druid by druid-io.

the class InputRowSerde method getTypeHelperMap.

public static Map<String, IndexSerdeTypeHelper> getTypeHelperMap(DimensionsSpec dimensionsSpec) {
    Map<String, IndexSerdeTypeHelper> typeHelperMap = new HashMap<>();
    for (DimensionSchema dimensionSchema : dimensionsSpec.getDimensions()) {
        IndexSerdeTypeHelper typeHelper;
        switch(dimensionSchema.getColumnType().getType()) {
            case STRING:
                typeHelper = STRING_HELPER;
            case LONG:
                typeHelper = LONG_HELPER;
            case FLOAT:
                typeHelper = FLOAT_HELPER;
            case DOUBLE:
                typeHelper = DOUBLE_HELPER;
                throw new IAE("Invalid type: [%s]", dimensionSchema.getColumnType());
        typeHelperMap.put(dimensionSchema.getName(), typeHelper);
    return typeHelperMap;
Also used : HashMap(java.util.HashMap) IAE( DimensionSchema(

Example 29 with IAE

use of in project druid by druid-io.

the class JobHelper method getURIFromSegment.

public static URI getURIFromSegment(DataSegment dataSegment) {
    // There is no good way around this...
    // TODO: add getURI() to URIDataPuller
    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
    final String type = loadSpec.get("type").toString();
    final URI segmentLocURI;
    if ("s3_zip".equals(type)) {
        if ("s3a".equals(loadSpec.get("S3Schema"))) {
            segmentLocURI = URI.create(StringUtils.format("s3a://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
        } else {
            segmentLocURI = URI.create(StringUtils.format("s3n://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
    } else if ("hdfs".equals(type)) {
        segmentLocURI = URI.create(loadSpec.get("path").toString());
    } else if ("google".equals(type)) {
        // Segment names contain : in their path.
        // Google Cloud Storage supports : but Hadoop does not.
        // This becomes an issue when re-indexing using the current segments.
        // The Hadoop getSplits code doesn't understand the : and returns "Relative path in absolute URI"
        // This could be fixed using the same code that generates path names for hdfs segments using
        // getHdfsStorageDir. But that wouldn't fix this issue for people who already have segments with ":".
        // Because of this we just URL encode the : making everything work as it should.
        segmentLocURI = URI.create(StringUtils.format("gs://%s/%s", loadSpec.get("bucket"), StringUtils.replaceChar(loadSpec.get("path").toString(), ':', "%3A")));
    } else if ("local".equals(type)) {
        try {
            segmentLocURI = new URI("file", null, loadSpec.get("path").toString(), null, null);
        } catch (URISyntaxException e) {
            throw new ISE(e, "Unable to form simple file uri");
    } else {
        try {
            throw new IAE("Cannot figure out loadSpec %s", HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(loadSpec));
        } catch (JsonProcessingException e) {
            throw new ISE("Cannot write Map with json mapper");
    return segmentLocURI;
Also used : ISE( URISyntaxException( IAE( URI( JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 30 with IAE

use of in project druid by druid-io.

the class IndexTaskClient method submitRequest.

 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                } catch (InterruptedException e2) {
                    throw new RuntimeException(e);
        } catch (NoTaskLocationException e) {
  "No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request( Duration(org.joda.time.Duration) IOException( TaskStatus(org.apache.druid.indexer.TaskStatus) IAE( TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException( ChannelException( JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException( ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder( IOE( ChannelException(


IAE ( ISE ( IOException ( ByteBuffer (java.nio.ByteBuffer)19 ArrayList (java.util.ArrayList)16 List (java.util.List)14 Expr (org.apache.druid.math.expr.Expr)14 Nullable (javax.annotation.Nullable)12 ColumnType (org.apache.druid.segment.column.ColumnType)10 HashSet (java.util.HashSet)8 Map (java.util.Map)8 Interval (org.joda.time.Interval)8 VisibleForTesting ( HashMap (java.util.HashMap)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 File ( Iterables ( Arrays (java.util.Arrays)5 Test (org.junit.Test)5 ImmutableMap (