use of org.apache.druid.java.util.common.RE in project druid by druid-io.
the class DirectDruidClient method run.
@Override
public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext context) {
final Query<T> query = queryPlus.getQuery();
QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
boolean isBySegment = QueryContexts.isBySegment(query);
final JavaType queryResultType = isBySegment ? toolChest.getBySegmentResultType() : toolChest.getBaseResultType();
final ListenableFuture<InputStream> future;
final String url = scheme + "://" + host + "/druid/v2/";
final String cancelUrl = url + query.getId();
try {
log.debug("Querying queryId[%s] url[%s]", query.getId(), url);
final long requestStartTimeNs = System.nanoTime();
final long timeoutAt = query.getContextValue(QUERY_FAIL_TIME);
final long maxScatterGatherBytes = QueryContexts.getMaxScatterGatherBytes(query);
final AtomicLong totalBytesGathered = context.getTotalBytes();
final long maxQueuedBytes = QueryContexts.getMaxQueuedBytes(query, 0);
final boolean usingBackpressure = maxQueuedBytes > 0;
final HttpResponseHandler<InputStream, InputStream> responseHandler = new HttpResponseHandler<InputStream, InputStream>() {
private final AtomicLong totalByteCount = new AtomicLong(0);
private final AtomicLong queuedByteCount = new AtomicLong(0);
private final AtomicLong channelSuspendedTime = new AtomicLong(0);
private final BlockingQueue<InputStreamHolder> queue = new LinkedBlockingQueue<>();
private final AtomicBoolean done = new AtomicBoolean(false);
private final AtomicReference<String> fail = new AtomicReference<>();
private final AtomicReference<TrafficCop> trafficCopRef = new AtomicReference<>();
private QueryMetrics<? super Query<T>> queryMetrics;
private long responseStartTimeNs;
private QueryMetrics<? super Query<T>> acquireResponseMetrics() {
if (queryMetrics == null) {
queryMetrics = toolChest.makeMetrics(query);
queryMetrics.server(host);
}
return queryMetrics;
}
/**
* Queue a buffer. Returns true if we should keep reading, false otherwise.
*/
private boolean enqueue(ChannelBuffer buffer, long chunkNum) throws InterruptedException {
// Increment queuedByteCount before queueing the object, so queuedByteCount is at least as high as
// the actual number of queued bytes at any particular time.
final InputStreamHolder holder = InputStreamHolder.fromChannelBuffer(buffer, chunkNum);
final long currentQueuedByteCount = queuedByteCount.addAndGet(holder.getLength());
queue.put(holder);
// True if we should keep reading.
return !usingBackpressure || currentQueuedByteCount < maxQueuedBytes;
}
private InputStream dequeue() throws InterruptedException {
final InputStreamHolder holder = queue.poll(checkQueryTimeout(), TimeUnit.MILLISECONDS);
if (holder == null) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query[%s] url[%s] timed out.", query.getId(), url));
}
final long currentQueuedByteCount = queuedByteCount.addAndGet(-holder.getLength());
if (usingBackpressure && currentQueuedByteCount < maxQueuedBytes) {
long backPressureTime = Preconditions.checkNotNull(trafficCopRef.get(), "No TrafficCop, how can this be?").resume(holder.getChunkNum());
channelSuspendedTime.addAndGet(backPressureTime);
}
return holder.getStream();
}
@Override
public ClientResponse<InputStream> handleResponse(HttpResponse response, TrafficCop trafficCop) {
trafficCopRef.set(trafficCop);
checkQueryTimeout();
checkTotalBytesLimit(response.getContent().readableBytes());
log.debug("Initial response from url[%s] for queryId[%s]", url, query.getId());
responseStartTimeNs = System.nanoTime();
acquireResponseMetrics().reportNodeTimeToFirstByte(responseStartTimeNs - requestStartTimeNs).emit(emitter);
final boolean continueReading;
try {
log.trace("Got a response from [%s] for query ID[%s], subquery ID[%s]", url, query.getId(), query.getSubQueryId());
final String responseContext = response.headers().get(QueryResource.HEADER_RESPONSE_CONTEXT);
context.addRemainingResponse(query.getMostSpecificId(), VAL_TO_REDUCE_REMAINING_RESPONSES);
// context may be null in case of error or query timeout
if (responseContext != null) {
context.merge(ResponseContext.deserialize(responseContext, objectMapper));
}
continueReading = enqueue(response.getContent(), 0L);
} catch (final IOException e) {
log.error(e, "Error parsing response context from url [%s]", url);
return ClientResponse.finished(new InputStream() {
@Override
public int read() throws IOException {
throw e;
}
});
} catch (InterruptedException e) {
log.error(e, "Queue appending interrupted");
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
totalByteCount.addAndGet(response.getContent().readableBytes());
return ClientResponse.finished(new SequenceInputStream(new Enumeration<InputStream>() {
@Override
public boolean hasMoreElements() {
if (fail.get() != null) {
throw new RE(fail.get());
}
checkQueryTimeout();
// Then the stream should be spouting good InputStreams.
synchronized (done) {
return !done.get() || !queue.isEmpty();
}
}
@Override
public InputStream nextElement() {
if (fail.get() != null) {
throw new RE(fail.get());
}
try {
return dequeue();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}
}), continueReading);
}
@Override
public ClientResponse<InputStream> handleChunk(ClientResponse<InputStream> clientResponse, HttpChunk chunk, long chunkNum) {
checkQueryTimeout();
final ChannelBuffer channelBuffer = chunk.getContent();
final int bytes = channelBuffer.readableBytes();
checkTotalBytesLimit(bytes);
boolean continueReading = true;
if (bytes > 0) {
try {
continueReading = enqueue(channelBuffer, chunkNum);
} catch (InterruptedException e) {
log.error(e, "Unable to put finalizing input stream into Sequence queue for url [%s]", url);
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
totalByteCount.addAndGet(bytes);
}
return ClientResponse.finished(clientResponse.getObj(), continueReading);
}
@Override
public ClientResponse<InputStream> done(ClientResponse<InputStream> clientResponse) {
long stopTimeNs = System.nanoTime();
long nodeTimeNs = stopTimeNs - requestStartTimeNs;
final long nodeTimeMs = TimeUnit.NANOSECONDS.toMillis(nodeTimeNs);
log.debug("Completed queryId[%s] request to url[%s] with %,d bytes returned in %,d millis [%,f b/s].", query.getId(), url, totalByteCount.get(), nodeTimeMs, // Floating math; division by zero will yield Inf, not exception
totalByteCount.get() / (0.001 * nodeTimeMs));
QueryMetrics<? super Query<T>> responseMetrics = acquireResponseMetrics();
responseMetrics.reportNodeTime(nodeTimeNs);
responseMetrics.reportNodeBytes(totalByteCount.get());
if (usingBackpressure) {
responseMetrics.reportBackPressureTime(channelSuspendedTime.get());
}
responseMetrics.emit(emitter);
synchronized (done) {
try {
// An empty byte array is put at the end to give the SequenceInputStream.close() as something to close out
// after done is set to true, regardless of the rest of the stream's state.
queue.put(InputStreamHolder.fromChannelBuffer(ChannelBuffers.EMPTY_BUFFER, Long.MAX_VALUE));
} catch (InterruptedException e) {
log.error(e, "Unable to put finalizing input stream into Sequence queue for url [%s]", url);
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} finally {
done.set(true);
}
}
return ClientResponse.finished(clientResponse.getObj());
}
@Override
public void exceptionCaught(final ClientResponse<InputStream> clientResponse, final Throwable e) {
String msg = StringUtils.format("Query[%s] url[%s] failed with exception msg [%s]", query.getId(), url, e.getMessage());
setupResponseReadFailure(msg, e);
}
private void setupResponseReadFailure(String msg, Throwable th) {
fail.set(msg);
queue.clear();
queue.offer(InputStreamHolder.fromStream(new InputStream() {
@Override
public int read() throws IOException {
if (th != null) {
throw new IOException(msg, th);
} else {
throw new IOException(msg);
}
}
}, -1, 0));
}
// Returns remaining timeout or throws exception if timeout already elapsed.
private long checkQueryTimeout() {
long timeLeft = timeoutAt - System.currentTimeMillis();
if (timeLeft <= 0) {
String msg = StringUtils.format("Query[%s] url[%s] timed out.", query.getId(), url);
setupResponseReadFailure(msg, null);
throw new QueryTimeoutException(msg);
} else {
return timeLeft;
}
}
private void checkTotalBytesLimit(long bytes) {
if (maxScatterGatherBytes < Long.MAX_VALUE && totalBytesGathered.addAndGet(bytes) > maxScatterGatherBytes) {
String msg = StringUtils.format("Query[%s] url[%s] max scatter-gather bytes limit reached.", query.getId(), url);
setupResponseReadFailure(msg, null);
throw new ResourceLimitExceededException(msg);
}
}
};
long timeLeft = timeoutAt - System.currentTimeMillis();
if (timeLeft <= 0) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query[%s] url[%s] timed out.", query.getId(), url));
}
future = httpClient.go(new Request(HttpMethod.POST, new URL(url)).setContent(objectMapper.writeValueAsBytes(QueryContexts.withTimeout(query, timeLeft))).setHeader(HttpHeaders.Names.CONTENT_TYPE, isSmile ? SmileMediaTypes.APPLICATION_JACKSON_SMILE : MediaType.APPLICATION_JSON), responseHandler, Duration.millis(timeLeft));
queryWatcher.registerQueryFuture(query, future);
openConnections.getAndIncrement();
Futures.addCallback(future, new FutureCallback<InputStream>() {
@Override
public void onSuccess(InputStream result) {
openConnections.getAndDecrement();
}
@Override
public void onFailure(Throwable t) {
openConnections.getAndDecrement();
if (future.isCancelled()) {
cancelQuery(query, cancelUrl);
}
}
}, // The callback is non-blocking and quick, so it's OK to schedule it using directExecutor()
Execs.directExecutor());
} catch (IOException e) {
throw new RuntimeException(e);
}
Sequence<T> retVal = new BaseSequence<>(new BaseSequence.IteratorMaker<T, JsonParserIterator<T>>() {
@Override
public JsonParserIterator<T> make() {
return new JsonParserIterator<T>(queryResultType, future, url, query, host, toolChest.decorateObjectMapper(objectMapper, query));
}
@Override
public void cleanup(JsonParserIterator<T> iterFromMake) {
CloseableUtils.closeAndWrapExceptions(iterFromMake);
}
});
// avoid the cost of de-serializing and then re-serializing again when adding to cache
if (!isBySegment) {
retVal = Sequences.map(retVal, toolChest.makePreComputeManipulatorFn(query, MetricManipulatorFns.deserializing()));
}
return retVal;
}
use of org.apache.druid.java.util.common.RE in project druid by druid-io.
the class HttpServerInventoryView method serverInventoryInitialized.
// best effort wait for first segment listing fetch from all servers and then call
// segmentViewInitialized on all registered segment callbacks.
private void serverInventoryInitialized() {
long start = System.currentTimeMillis();
long serverSyncWaitTimeout = config.getServerTimeout() + 2 * ChangeRequestHttpSyncer.HTTP_TIMEOUT_EXTRA_MS;
List<DruidServerHolder> uninitializedServers = new ArrayList<>();
for (DruidServerHolder server : servers.values()) {
if (!server.isSyncedSuccessfullyAtleastOnce()) {
uninitializedServers.add(server);
}
}
while (!uninitializedServers.isEmpty() && ((System.currentTimeMillis() - start) < serverSyncWaitTimeout)) {
try {
Thread.sleep(5000);
} catch (InterruptedException ex) {
throw new RE(ex, "Interrupted while waiting for queryable server initial successful sync.");
}
log.info("Checking whether all servers have been synced at least once yet....");
Iterator<DruidServerHolder> iter = uninitializedServers.iterator();
while (iter.hasNext()) {
if (iter.next().isSyncedSuccessfullyAtleastOnce()) {
iter.remove();
}
}
}
if (uninitializedServers.isEmpty()) {
log.info("All servers have been synced successfully at least once.");
} else {
for (DruidServerHolder server : uninitializedServers) {
log.warn("Server[%s] might not yet be synced successfully. We will continue to retry that in the background.", server.druidServer.getName());
}
}
log.info("Calling SegmentCallback.segmentViewInitialized() for all callbacks.");
runSegmentCallbacks(new Function<SegmentCallback, CallbackAction>() {
@Override
public CallbackAction apply(SegmentCallback input) {
return input.segmentViewInitialized();
}
});
}
use of org.apache.druid.java.util.common.RE in project druid by druid-io.
the class ObjectSummaryIterator method fetchNextBatch.
private void fetchNextBatch() {
try {
result = S3Utils.retryS3Operation(() -> s3Client.listObjectsV2(request), maxRetries);
request.setContinuationToken(result.getNextContinuationToken());
objectSummaryIterator = result.getObjectSummaries().iterator();
} catch (AmazonS3Exception e) {
throw new RE(e, "Failed to get object summaries from S3 bucket[%s], prefix[%s]; S3 error: %s", request.getBucketName(), request.getPrefix(), e.getMessage());
} catch (Exception e) {
throw new RE(e, "Failed to get object summaries from S3 bucket[%s], prefix[%s]", request.getBucketName(), request.getPrefix());
}
}
use of org.apache.druid.java.util.common.RE in project druid by druid-io.
the class IndexGeneratorJobTest method verifyJob.
private void verifyJob(IndexGeneratorJob job) throws IOException {
Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
Assert.assertTrue(workingPath.exists());
final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
int segmentNum = 0;
for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentOutputFolder.exists());
Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
Assert.assertTrue(individualSegmentFolder.exists());
File indexZip = new File(individualSegmentFolder, "index.zip");
Assert.assertTrue(indexZip.exists());
intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
}
}
Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
segmentNum = 0;
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final Interval interval = entry.getKey();
final List<DataSegment> segments = entry.getValue();
final List<File> indexFiles = intervalToIndexFiles.get(interval);
Collections.sort(segments);
indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
Assert.assertNotNull(indexFiles);
Assert.assertEquals(segments.size(), indexFiles.size());
Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
for (int i = 0; i < segments.size(); i++) {
final DataSegment dataSegment = segments.get(i);
final File indexZip = indexFiles.get(i);
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
if ("website".equals(datasourceName)) {
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
} else if ("inherit_dims".equals(datasourceName)) {
Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
Assert.assertEquals("count", dataSegment.getMetrics().get(0));
} else if ("inherit_dims2".equals(datasourceName)) {
Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
Assert.assertEquals("count", dataSegment.getMetrics().get(0));
} else {
Assert.fail("Test did not specify supported datasource name");
}
if (forceExtendableShardSpecs) {
NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(i, spec.getPartitionNum());
Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
} else if ("hashed".equals(partitionType)) {
Integer[] hashShardInfo = (Integer[]) shardInfo[i];
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
} else if ("single".equals(partitionType)) {
String[] singleDimensionShardInfo = (String[]) shardInfo[i];
SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
} else {
throw new RE("Invalid partition type:[%s]", partitionType);
}
}
}
}
use of org.apache.druid.java.util.common.RE in project druid by druid-io.
the class CompactionTask method decideRollupAndQueryGranularityCarryOver.
/**
* Decide which rollup & queryCardinalities to propage for the compacted segment based on
* the data segments given
*
* @param rollup Reference to update with the rollup value
* @param queryGranularity Reference to update with the queryGranularity value
* @param queryableIndexAndSegments The segments to compact
*/
private static void decideRollupAndQueryGranularityCarryOver(SettableSupplier<Boolean> rollup, SettableSupplier<Granularity> queryGranularity, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments) {
final SettableSupplier<Boolean> rollupIsValid = new SettableSupplier<>(true);
for (NonnullPair<QueryableIndex, DataSegment> pair : queryableIndexAndSegments) {
final QueryableIndex index = pair.lhs;
if (index.getMetadata() == null) {
throw new RE("Index metadata doesn't exist for segment[%s]", pair.rhs.getId());
}
// Pick rollup value if all segments being compacted have the same, non-null, value otherwise set it to false
if (rollupIsValid.get()) {
Boolean isRollup = index.getMetadata().isRollup();
if (isRollup == null) {
rollupIsValid.set(false);
rollup.set(false);
} else if (rollup.get() == null) {
rollup.set(isRollup);
} else if (!rollup.get().equals(isRollup.booleanValue())) {
rollupIsValid.set(false);
rollup.set(false);
}
}
// Pick the finer, non-null, of the query granularities of the segments being compacted
Granularity current = index.getMetadata().getQueryGranularity();
queryGranularity.set(compareWithCurrent(queryGranularity.get(), current));
}
}
Aggregations