Search in sources :

Example 6 with RE

use of org.apache.druid.java.util.common.RE in project druid by druid-io.

the class DirectDruidClient method run.

@Override
public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext context) {
    final Query<T> query = queryPlus.getQuery();
    QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    boolean isBySegment = QueryContexts.isBySegment(query);
    final JavaType queryResultType = isBySegment ? toolChest.getBySegmentResultType() : toolChest.getBaseResultType();
    final ListenableFuture<InputStream> future;
    final String url = scheme + "://" + host + "/druid/v2/";
    final String cancelUrl = url + query.getId();
    try {
        log.debug("Querying queryId[%s] url[%s]", query.getId(), url);
        final long requestStartTimeNs = System.nanoTime();
        final long timeoutAt = query.getContextValue(QUERY_FAIL_TIME);
        final long maxScatterGatherBytes = QueryContexts.getMaxScatterGatherBytes(query);
        final AtomicLong totalBytesGathered = context.getTotalBytes();
        final long maxQueuedBytes = QueryContexts.getMaxQueuedBytes(query, 0);
        final boolean usingBackpressure = maxQueuedBytes > 0;
        final HttpResponseHandler<InputStream, InputStream> responseHandler = new HttpResponseHandler<InputStream, InputStream>() {

            private final AtomicLong totalByteCount = new AtomicLong(0);

            private final AtomicLong queuedByteCount = new AtomicLong(0);

            private final AtomicLong channelSuspendedTime = new AtomicLong(0);

            private final BlockingQueue<InputStreamHolder> queue = new LinkedBlockingQueue<>();

            private final AtomicBoolean done = new AtomicBoolean(false);

            private final AtomicReference<String> fail = new AtomicReference<>();

            private final AtomicReference<TrafficCop> trafficCopRef = new AtomicReference<>();

            private QueryMetrics<? super Query<T>> queryMetrics;

            private long responseStartTimeNs;

            private QueryMetrics<? super Query<T>> acquireResponseMetrics() {
                if (queryMetrics == null) {
                    queryMetrics = toolChest.makeMetrics(query);
                    queryMetrics.server(host);
                }
                return queryMetrics;
            }

            /**
             * Queue a buffer. Returns true if we should keep reading, false otherwise.
             */
            private boolean enqueue(ChannelBuffer buffer, long chunkNum) throws InterruptedException {
                // Increment queuedByteCount before queueing the object, so queuedByteCount is at least as high as
                // the actual number of queued bytes at any particular time.
                final InputStreamHolder holder = InputStreamHolder.fromChannelBuffer(buffer, chunkNum);
                final long currentQueuedByteCount = queuedByteCount.addAndGet(holder.getLength());
                queue.put(holder);
                // True if we should keep reading.
                return !usingBackpressure || currentQueuedByteCount < maxQueuedBytes;
            }

            private InputStream dequeue() throws InterruptedException {
                final InputStreamHolder holder = queue.poll(checkQueryTimeout(), TimeUnit.MILLISECONDS);
                if (holder == null) {
                    throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query[%s] url[%s] timed out.", query.getId(), url));
                }
                final long currentQueuedByteCount = queuedByteCount.addAndGet(-holder.getLength());
                if (usingBackpressure && currentQueuedByteCount < maxQueuedBytes) {
                    long backPressureTime = Preconditions.checkNotNull(trafficCopRef.get(), "No TrafficCop, how can this be?").resume(holder.getChunkNum());
                    channelSuspendedTime.addAndGet(backPressureTime);
                }
                return holder.getStream();
            }

            @Override
            public ClientResponse<InputStream> handleResponse(HttpResponse response, TrafficCop trafficCop) {
                trafficCopRef.set(trafficCop);
                checkQueryTimeout();
                checkTotalBytesLimit(response.getContent().readableBytes());
                log.debug("Initial response from url[%s] for queryId[%s]", url, query.getId());
                responseStartTimeNs = System.nanoTime();
                acquireResponseMetrics().reportNodeTimeToFirstByte(responseStartTimeNs - requestStartTimeNs).emit(emitter);
                final boolean continueReading;
                try {
                    log.trace("Got a response from [%s] for query ID[%s], subquery ID[%s]", url, query.getId(), query.getSubQueryId());
                    final String responseContext = response.headers().get(QueryResource.HEADER_RESPONSE_CONTEXT);
                    context.addRemainingResponse(query.getMostSpecificId(), VAL_TO_REDUCE_REMAINING_RESPONSES);
                    // context may be null in case of error or query timeout
                    if (responseContext != null) {
                        context.merge(ResponseContext.deserialize(responseContext, objectMapper));
                    }
                    continueReading = enqueue(response.getContent(), 0L);
                } catch (final IOException e) {
                    log.error(e, "Error parsing response context from url [%s]", url);
                    return ClientResponse.finished(new InputStream() {

                        @Override
                        public int read() throws IOException {
                            throw e;
                        }
                    });
                } catch (InterruptedException e) {
                    log.error(e, "Queue appending interrupted");
                    Thread.currentThread().interrupt();
                    throw new RuntimeException(e);
                }
                totalByteCount.addAndGet(response.getContent().readableBytes());
                return ClientResponse.finished(new SequenceInputStream(new Enumeration<InputStream>() {

                    @Override
                    public boolean hasMoreElements() {
                        if (fail.get() != null) {
                            throw new RE(fail.get());
                        }
                        checkQueryTimeout();
                        // Then the stream should be spouting good InputStreams.
                        synchronized (done) {
                            return !done.get() || !queue.isEmpty();
                        }
                    }

                    @Override
                    public InputStream nextElement() {
                        if (fail.get() != null) {
                            throw new RE(fail.get());
                        }
                        try {
                            return dequeue();
                        } catch (InterruptedException e) {
                            Thread.currentThread().interrupt();
                            throw new RuntimeException(e);
                        }
                    }
                }), continueReading);
            }

            @Override
            public ClientResponse<InputStream> handleChunk(ClientResponse<InputStream> clientResponse, HttpChunk chunk, long chunkNum) {
                checkQueryTimeout();
                final ChannelBuffer channelBuffer = chunk.getContent();
                final int bytes = channelBuffer.readableBytes();
                checkTotalBytesLimit(bytes);
                boolean continueReading = true;
                if (bytes > 0) {
                    try {
                        continueReading = enqueue(channelBuffer, chunkNum);
                    } catch (InterruptedException e) {
                        log.error(e, "Unable to put finalizing input stream into Sequence queue for url [%s]", url);
                        Thread.currentThread().interrupt();
                        throw new RuntimeException(e);
                    }
                    totalByteCount.addAndGet(bytes);
                }
                return ClientResponse.finished(clientResponse.getObj(), continueReading);
            }

            @Override
            public ClientResponse<InputStream> done(ClientResponse<InputStream> clientResponse) {
                long stopTimeNs = System.nanoTime();
                long nodeTimeNs = stopTimeNs - requestStartTimeNs;
                final long nodeTimeMs = TimeUnit.NANOSECONDS.toMillis(nodeTimeNs);
                log.debug("Completed queryId[%s] request to url[%s] with %,d bytes returned in %,d millis [%,f b/s].", query.getId(), url, totalByteCount.get(), nodeTimeMs, // Floating math; division by zero will yield Inf, not exception
                totalByteCount.get() / (0.001 * nodeTimeMs));
                QueryMetrics<? super Query<T>> responseMetrics = acquireResponseMetrics();
                responseMetrics.reportNodeTime(nodeTimeNs);
                responseMetrics.reportNodeBytes(totalByteCount.get());
                if (usingBackpressure) {
                    responseMetrics.reportBackPressureTime(channelSuspendedTime.get());
                }
                responseMetrics.emit(emitter);
                synchronized (done) {
                    try {
                        // An empty byte array is put at the end to give the SequenceInputStream.close() as something to close out
                        // after done is set to true, regardless of the rest of the stream's state.
                        queue.put(InputStreamHolder.fromChannelBuffer(ChannelBuffers.EMPTY_BUFFER, Long.MAX_VALUE));
                    } catch (InterruptedException e) {
                        log.error(e, "Unable to put finalizing input stream into Sequence queue for url [%s]", url);
                        Thread.currentThread().interrupt();
                        throw new RuntimeException(e);
                    } finally {
                        done.set(true);
                    }
                }
                return ClientResponse.finished(clientResponse.getObj());
            }

            @Override
            public void exceptionCaught(final ClientResponse<InputStream> clientResponse, final Throwable e) {
                String msg = StringUtils.format("Query[%s] url[%s] failed with exception msg [%s]", query.getId(), url, e.getMessage());
                setupResponseReadFailure(msg, e);
            }

            private void setupResponseReadFailure(String msg, Throwable th) {
                fail.set(msg);
                queue.clear();
                queue.offer(InputStreamHolder.fromStream(new InputStream() {

                    @Override
                    public int read() throws IOException {
                        if (th != null) {
                            throw new IOException(msg, th);
                        } else {
                            throw new IOException(msg);
                        }
                    }
                }, -1, 0));
            }

            // Returns remaining timeout or throws exception if timeout already elapsed.
            private long checkQueryTimeout() {
                long timeLeft = timeoutAt - System.currentTimeMillis();
                if (timeLeft <= 0) {
                    String msg = StringUtils.format("Query[%s] url[%s] timed out.", query.getId(), url);
                    setupResponseReadFailure(msg, null);
                    throw new QueryTimeoutException(msg);
                } else {
                    return timeLeft;
                }
            }

            private void checkTotalBytesLimit(long bytes) {
                if (maxScatterGatherBytes < Long.MAX_VALUE && totalBytesGathered.addAndGet(bytes) > maxScatterGatherBytes) {
                    String msg = StringUtils.format("Query[%s] url[%s] max scatter-gather bytes limit reached.", query.getId(), url);
                    setupResponseReadFailure(msg, null);
                    throw new ResourceLimitExceededException(msg);
                }
            }
        };
        long timeLeft = timeoutAt - System.currentTimeMillis();
        if (timeLeft <= 0) {
            throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query[%s] url[%s] timed out.", query.getId(), url));
        }
        future = httpClient.go(new Request(HttpMethod.POST, new URL(url)).setContent(objectMapper.writeValueAsBytes(QueryContexts.withTimeout(query, timeLeft))).setHeader(HttpHeaders.Names.CONTENT_TYPE, isSmile ? SmileMediaTypes.APPLICATION_JACKSON_SMILE : MediaType.APPLICATION_JSON), responseHandler, Duration.millis(timeLeft));
        queryWatcher.registerQueryFuture(query, future);
        openConnections.getAndIncrement();
        Futures.addCallback(future, new FutureCallback<InputStream>() {

            @Override
            public void onSuccess(InputStream result) {
                openConnections.getAndDecrement();
            }

            @Override
            public void onFailure(Throwable t) {
                openConnections.getAndDecrement();
                if (future.isCancelled()) {
                    cancelQuery(query, cancelUrl);
                }
            }
        }, // The callback is non-blocking and quick, so it's OK to schedule it using directExecutor()
        Execs.directExecutor());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Sequence<T> retVal = new BaseSequence<>(new BaseSequence.IteratorMaker<T, JsonParserIterator<T>>() {

        @Override
        public JsonParserIterator<T> make() {
            return new JsonParserIterator<T>(queryResultType, future, url, query, host, toolChest.decorateObjectMapper(objectMapper, query));
        }

        @Override
        public void cleanup(JsonParserIterator<T> iterFromMake) {
            CloseableUtils.closeAndWrapExceptions(iterFromMake);
        }
    });
    // avoid the cost of de-serializing and then re-serializing again when adding to cache
    if (!isBySegment) {
        retVal = Sequences.map(retVal, toolChest.makePreComputeManipulatorFn(query, MetricManipulatorFns.deserializing()));
    }
    return retVal;
}
Also used : ClientResponse(org.apache.druid.java.util.http.client.response.ClientResponse) Query(org.apache.druid.query.Query) URL(java.net.URL) ChannelBuffer(org.jboss.netty.buffer.ChannelBuffer) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) BlockingQueue(java.util.concurrent.BlockingQueue) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Enumeration(java.util.Enumeration) SequenceInputStream(java.io.SequenceInputStream) InputStream(java.io.InputStream) Request(org.apache.druid.java.util.http.client.Request) HttpResponse(org.jboss.netty.handler.codec.http.HttpResponse) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JavaType(com.fasterxml.jackson.databind.JavaType) AtomicLong(java.util.concurrent.atomic.AtomicLong) RE(org.apache.druid.java.util.common.RE) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) SequenceInputStream(java.io.SequenceInputStream) QueryMetrics(org.apache.druid.query.QueryMetrics) HttpResponseHandler(org.apache.druid.java.util.http.client.response.HttpResponseHandler) HttpChunk(org.jboss.netty.handler.codec.http.HttpChunk)

Example 7 with RE

use of org.apache.druid.java.util.common.RE in project druid by druid-io.

the class HttpServerInventoryView method serverInventoryInitialized.

// best effort wait for first segment listing fetch from all servers and then call
// segmentViewInitialized on all registered segment callbacks.
private void serverInventoryInitialized() {
    long start = System.currentTimeMillis();
    long serverSyncWaitTimeout = config.getServerTimeout() + 2 * ChangeRequestHttpSyncer.HTTP_TIMEOUT_EXTRA_MS;
    List<DruidServerHolder> uninitializedServers = new ArrayList<>();
    for (DruidServerHolder server : servers.values()) {
        if (!server.isSyncedSuccessfullyAtleastOnce()) {
            uninitializedServers.add(server);
        }
    }
    while (!uninitializedServers.isEmpty() && ((System.currentTimeMillis() - start) < serverSyncWaitTimeout)) {
        try {
            Thread.sleep(5000);
        } catch (InterruptedException ex) {
            throw new RE(ex, "Interrupted while waiting for queryable server initial successful sync.");
        }
        log.info("Checking whether all servers have been synced at least once yet....");
        Iterator<DruidServerHolder> iter = uninitializedServers.iterator();
        while (iter.hasNext()) {
            if (iter.next().isSyncedSuccessfullyAtleastOnce()) {
                iter.remove();
            }
        }
    }
    if (uninitializedServers.isEmpty()) {
        log.info("All servers have been synced successfully at least once.");
    } else {
        for (DruidServerHolder server : uninitializedServers) {
            log.warn("Server[%s] might not yet be synced successfully. We will continue to retry that in the background.", server.druidServer.getName());
        }
    }
    log.info("Calling SegmentCallback.segmentViewInitialized() for all callbacks.");
    runSegmentCallbacks(new Function<SegmentCallback, CallbackAction>() {

        @Override
        public CallbackAction apply(SegmentCallback input) {
            return input.segmentViewInitialized();
        }
    });
}
Also used : RE(org.apache.druid.java.util.common.RE) ArrayList(java.util.ArrayList)

Example 8 with RE

use of org.apache.druid.java.util.common.RE in project druid by druid-io.

the class ObjectSummaryIterator method fetchNextBatch.

private void fetchNextBatch() {
    try {
        result = S3Utils.retryS3Operation(() -> s3Client.listObjectsV2(request), maxRetries);
        request.setContinuationToken(result.getNextContinuationToken());
        objectSummaryIterator = result.getObjectSummaries().iterator();
    } catch (AmazonS3Exception e) {
        throw new RE(e, "Failed to get object summaries from S3 bucket[%s], prefix[%s]; S3 error: %s", request.getBucketName(), request.getPrefix(), e.getMessage());
    } catch (Exception e) {
        throw new RE(e, "Failed to get object summaries from S3 bucket[%s], prefix[%s]", request.getBucketName(), request.getPrefix());
    }
}
Also used : RE(org.apache.druid.java.util.common.RE) AmazonS3Exception(com.amazonaws.services.s3.model.AmazonS3Exception) AmazonS3Exception(com.amazonaws.services.s3.model.AmazonS3Exception) NoSuchElementException(java.util.NoSuchElementException)

Example 9 with RE

use of org.apache.druid.java.util.common.RE in project druid by druid-io.

the class IndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertTrue(workingPath.exists());
    final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(indexZip.exists());
            intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
        }
    }
    Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
    segmentNum = 0;
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segments = entry.getValue();
        final List<File> indexFiles = intervalToIndexFiles.get(interval);
        Collections.sort(segments);
        indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
        Assert.assertNotNull(indexFiles);
        Assert.assertEquals(segments.size(), indexFiles.size());
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        for (int i = 0; i < segments.size(); i++) {
            final DataSegment dataSegment = segments.get(i);
            final File indexZip = indexFiles.get(i);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            if ("website".equals(datasourceName)) {
                Assert.assertEquals("website", dataSegment.getDataSource());
                Assert.assertEquals("host", dataSegment.getDimensions().get(0));
                Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
                Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            } else if ("inherit_dims".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else if ("inherit_dims2".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else {
                Assert.fail("Test did not specify supported datasource name");
            }
            if (forceExtendableShardSpecs) {
                NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(i, spec.getPartitionNum());
                Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
            } else if ("hashed".equals(partitionType)) {
                Integer[] hashShardInfo = (Integer[]) shardInfo[i];
                HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
                Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
            } else if ("single".equals(partitionType)) {
                String[] singleDimensionShardInfo = (String[]) shardInfo[i];
                SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
                Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
            } else {
                throw new RE("Invalid partition type:[%s]", partitionType);
            }
        }
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteBuffer(java.nio.ByteBuffer) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MRJobConfig(org.apache.hadoop.mapreduce.MRJobConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) Parameterized(org.junit.runners.Parameterized) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) DateTimeComparator(org.joda.time.DateTimeComparator) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) BytesWritable(org.apache.hadoop.io.BytesWritable) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) RE(org.apache.druid.java.util.common.RE) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) TreeMap(java.util.TreeMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(org.apache.hadoop.io.SequenceFile.Writer) Assert(org.junit.Assert) Comparator(java.util.Comparator) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RE(org.apache.druid.java.util.common.RE) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Example 10 with RE

use of org.apache.druid.java.util.common.RE in project druid by druid-io.

the class CompactionTask method decideRollupAndQueryGranularityCarryOver.

/**
 * Decide which rollup & queryCardinalities to propage for the compacted segment based on
 * the data segments given
 *
 * @param rollup                    Reference to update with the rollup value
 * @param queryGranularity          Reference to update with the queryGranularity value
 * @param queryableIndexAndSegments The segments to compact
 */
private static void decideRollupAndQueryGranularityCarryOver(SettableSupplier<Boolean> rollup, SettableSupplier<Granularity> queryGranularity, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments) {
    final SettableSupplier<Boolean> rollupIsValid = new SettableSupplier<>(true);
    for (NonnullPair<QueryableIndex, DataSegment> pair : queryableIndexAndSegments) {
        final QueryableIndex index = pair.lhs;
        if (index.getMetadata() == null) {
            throw new RE("Index metadata doesn't exist for segment[%s]", pair.rhs.getId());
        }
        // Pick rollup value if all segments being compacted have the same, non-null, value otherwise set it to false
        if (rollupIsValid.get()) {
            Boolean isRollup = index.getMetadata().isRollup();
            if (isRollup == null) {
                rollupIsValid.set(false);
                rollup.set(false);
            } else if (rollup.get() == null) {
                rollup.set(isRollup);
            } else if (!rollup.get().equals(isRollup.booleanValue())) {
                rollupIsValid.set(false);
                rollup.set(false);
            }
        }
        // Pick the finer, non-null, of the query granularities of the segments being compacted
        Granularity current = index.getMetadata().getQueryGranularity();
        queryGranularity.set(compareWithCurrent(queryGranularity.get(), current));
    }
}
Also used : SettableSupplier(org.apache.druid.common.guava.SettableSupplier) RE(org.apache.druid.java.util.common.RE) QueryableIndex(org.apache.druid.segment.QueryableIndex) LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSegment(org.apache.druid.timeline.DataSegment)

Aggregations

RE (org.apache.druid.java.util.common.RE)46 IOException (java.io.IOException)11 Request (org.apache.druid.java.util.http.client.Request)10 URL (java.net.URL)8 ArrayList (java.util.ArrayList)6 Test (org.junit.Test)6 InputStream (java.io.InputStream)5 ExecutionException (java.util.concurrent.ExecutionException)5 ISE (org.apache.druid.java.util.common.ISE)5 Map (java.util.Map)4 StatusResponseHolder (org.apache.druid.java.util.http.client.response.StatusResponseHolder)4 JavaType (com.fasterxml.jackson.databind.JavaType)3 HashMap (java.util.HashMap)3 DataSegment (org.apache.druid.timeline.DataSegment)3 OSSException (com.aliyun.oss.OSSException)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 NamedType (com.fasterxml.jackson.databind.jsontype.NamedType)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ApiException (io.kubernetes.client.openapi.ApiException)2 File (java.io.File)2