Search in sources :

Example 26 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class S3DataSegmentPuller method getSegmentFiles.

FileUtils.FileCopyResult getSegmentFiles(final CloudObjectLocation s3Coords, final File outDir) throws SegmentLoadingException {
    log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
    if (!isObjectInBucket(s3Coords)) {
        throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    }
    try {
        FileUtils.mkdirp(outDir);
        final URI uri = s3Coords.toUri(S3StorageDruidModule.SCHEME);
        final ByteSource byteSource = new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return buildFileObject(uri).openInputStream();
                } catch (AmazonServiceException e) {
                    if (e.getCause() != null) {
                        if (S3Utils.S3RETRY.apply(e)) {
                            throw new IOException("Recoverable exception", e);
                        }
                    }
                    throw new RuntimeException(e);
                }
            }
        };
        if (CompressionUtils.isZip(s3Coords.getPath())) {
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, false);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
            return result;
        }
        if (CompressionUtils.isGz(s3Coords.getPath())) {
            final String fname = Files.getNameWithoutExtension(uri.getPath());
            final File outFile = new File(outDir, fname);
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
            return result;
        }
        throw new IAE("Do not know how to load file type at [%s]", uri.toString());
    } catch (Exception e) {
        try {
            FileUtils.deleteDirectory(outDir);
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
        }
        throw new SegmentLoadingException(e, e.getMessage());
    }
}
Also used : SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) FileUtils(org.apache.druid.java.util.common.FileUtils) AmazonServiceException(com.amazonaws.AmazonServiceException) ByteSource(com.google.common.io.ByteSource) IOException(java.io.IOException) IAE(org.apache.druid.java.util.common.IAE) URI(java.net.URI) File(java.io.File) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) AmazonS3Exception(com.amazonaws.services.s3.model.AmazonS3Exception) AmazonServiceException(com.amazonaws.AmazonServiceException) IOException(java.io.IOException) AmazonClientException(com.amazonaws.AmazonClientException)

Example 27 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class HyperLogLogCollector method add.

public void add(byte[] hashedValue) {
    if (hashedValue.length < MIN_BYTES_REQUIRED) {
        throw new IAE("Insufficient bytes, need[%d] got [%d]", MIN_BYTES_REQUIRED, hashedValue.length);
    }
    estimatedCardinality = null;
    final ByteBuffer buffer = ByteBuffer.wrap(hashedValue);
    short bucket = (short) (buffer.getShort(hashedValue.length - 2) & BUCKET_MASK);
    byte positionOf1 = 0;
    for (int i = 0; i < 8; ++i) {
        byte lookupVal = ByteBitLookup.LOOKUP[UnsignedBytes.toInt(hashedValue[i])];
        switch(lookupVal) {
            case 0:
                positionOf1 += (byte) 8;
                continue;
            default:
                positionOf1 += lookupVal;
                i = 8;
                break;
        }
    }
    add(bucket, positionOf1);
}
Also used : IAE(org.apache.druid.java.util.common.IAE) ByteBuffer(java.nio.ByteBuffer)

Example 28 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class InputRowSerde method getTypeHelperMap.

public static Map<String, IndexSerdeTypeHelper> getTypeHelperMap(DimensionsSpec dimensionsSpec) {
    Map<String, IndexSerdeTypeHelper> typeHelperMap = new HashMap<>();
    for (DimensionSchema dimensionSchema : dimensionsSpec.getDimensions()) {
        IndexSerdeTypeHelper typeHelper;
        switch(dimensionSchema.getColumnType().getType()) {
            case STRING:
                typeHelper = STRING_HELPER;
                break;
            case LONG:
                typeHelper = LONG_HELPER;
                break;
            case FLOAT:
                typeHelper = FLOAT_HELPER;
                break;
            case DOUBLE:
                typeHelper = DOUBLE_HELPER;
                break;
            default:
                throw new IAE("Invalid type: [%s]", dimensionSchema.getColumnType());
        }
        typeHelperMap.put(dimensionSchema.getName(), typeHelper);
    }
    return typeHelperMap;
}
Also used : HashMap(java.util.HashMap) IAE(org.apache.druid.java.util.common.IAE) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Example 29 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class JobHelper method getURIFromSegment.

public static URI getURIFromSegment(DataSegment dataSegment) {
    // There is no good way around this...
    // TODO: add getURI() to URIDataPuller
    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
    final String type = loadSpec.get("type").toString();
    final URI segmentLocURI;
    if ("s3_zip".equals(type)) {
        if ("s3a".equals(loadSpec.get("S3Schema"))) {
            segmentLocURI = URI.create(StringUtils.format("s3a://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
        } else {
            segmentLocURI = URI.create(StringUtils.format("s3n://%s/%s", loadSpec.get("bucket"), loadSpec.get("key")));
        }
    } else if ("hdfs".equals(type)) {
        segmentLocURI = URI.create(loadSpec.get("path").toString());
    } else if ("google".equals(type)) {
        // Segment names contain : in their path.
        // Google Cloud Storage supports : but Hadoop does not.
        // This becomes an issue when re-indexing using the current segments.
        // The Hadoop getSplits code doesn't understand the : and returns "Relative path in absolute URI"
        // This could be fixed using the same code that generates path names for hdfs segments using
        // getHdfsStorageDir. But that wouldn't fix this issue for people who already have segments with ":".
        // Because of this we just URL encode the : making everything work as it should.
        segmentLocURI = URI.create(StringUtils.format("gs://%s/%s", loadSpec.get("bucket"), StringUtils.replaceChar(loadSpec.get("path").toString(), ':', "%3A")));
    } else if ("local".equals(type)) {
        try {
            segmentLocURI = new URI("file", null, loadSpec.get("path").toString(), null, null);
        } catch (URISyntaxException e) {
            throw new ISE(e, "Unable to form simple file uri");
        }
    } else {
        try {
            throw new IAE("Cannot figure out loadSpec %s", HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(loadSpec));
        } catch (JsonProcessingException e) {
            throw new ISE("Cannot write Map with json mapper");
        }
    }
    return segmentLocURI;
}
Also used : ISE(org.apache.druid.java.util.common.ISE) URISyntaxException(java.net.URISyntaxException) IAE(org.apache.druid.java.util.common.IAE) URI(java.net.URI) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 30 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class IndexTaskClient method submitRequest.

/**
 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
 */
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        }
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        }
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                }
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
                }
            }
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
                }
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            }
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                    e.addSuppressed(e2);
                    throw new RuntimeException(e);
                }
            }
        } catch (NoTaskLocationException e) {
            log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
        }
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request(org.apache.druid.java.util.http.client.Request) Duration(org.joda.time.Duration) IOException(java.io.IOException) TaskStatus(org.apache.druid.indexer.TaskStatus) IAE(org.apache.druid.java.util.common.IAE) TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException(java.net.MalformedURLException) ChannelException(org.jboss.netty.channel.ChannelException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Aggregations

IAE (org.apache.druid.java.util.common.IAE)115 ISE (org.apache.druid.java.util.common.ISE)23 IOException (java.io.IOException)20 ByteBuffer (java.nio.ByteBuffer)19 ArrayList (java.util.ArrayList)16 List (java.util.List)14 Expr (org.apache.druid.math.expr.Expr)14 Nullable (javax.annotation.Nullable)12 ColumnType (org.apache.druid.segment.column.ColumnType)10 HashSet (java.util.HashSet)8 Map (java.util.Map)8 Interval (org.joda.time.Interval)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7 HashMap (java.util.HashMap)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 File (java.io.File)6 Iterables (com.google.common.collect.Iterables)5 Arrays (java.util.Arrays)5 Test (org.junit.Test)5 ImmutableMap (com.google.common.collect.ImmutableMap)4