use of io.confluent.ksql.internal.ScalablePushQueryMetrics in project ksql by confluentinc.
the class PushRouting method connectToHosts.
/**
* Connects to all of the hosts provided.
* @return A future for a PushConnectionsHandle, which can be used to terminate connections.
*/
@SuppressWarnings("checkstyle:ParameterNumber")
private CompletableFuture<PushConnectionsHandle> connectToHosts(final ServiceContext serviceContext, final PushPhysicalPlanManager pushPhysicalPlanManager, final ConfiguredStatement<Query> statement, final Collection<KsqlNode> hosts, final LogicalSchema outputSchema, final TransientQueryQueue transientQueryQueue, final PushConnectionsHandle pushConnectionsHandle, final boolean dynamicallyAddedNode, final Optional<ScalablePushQueryMetrics> scalablePushQueryMetrics, final Set<KsqlNode> catchupHosts, final PushRoutingOptions pushRoutingOptions, final String thisHostName) {
final Map<KsqlNode, CompletableFuture<RoutingResult>> futureMap = new LinkedHashMap<>();
for (final KsqlNode node : hosts) {
pushConnectionsHandle.add(node, new RoutingResult(RoutingResultStatus.IN_PROGRESS, () -> {
}));
final CompletableFuture<Void> callback = new CompletableFuture<>();
callback.handle((v, t) -> {
if (t == null) {
pushConnectionsHandle.get(node).ifPresent(result -> {
result.close();
result.updateStatus(RoutingResultStatus.COMPLETE);
});
LOG.info("Host {} completed request {}.", node, pushPhysicalPlanManager.getQueryId());
} else if (t instanceof GapFoundException) {
pushConnectionsHandle.get(node).ifPresent(result -> {
result.close();
result.updateStatus(RoutingResultStatus.OFFSET_GAP_FOUND);
});
} else {
pushConnectionsHandle.completeExceptionally(t);
}
return null;
});
futureMap.put(node, executeOrRouteQuery(node, statement, serviceContext, pushPhysicalPlanManager, outputSchema, transientQueryQueue, callback, scalablePushQueryMetrics, pushConnectionsHandle.getOffsetsTracker(), catchupHosts.contains(node), pushRoutingOptions, thisHostName));
}
return CompletableFuture.allOf(futureMap.values().toArray(new CompletableFuture[0])).thenApply(v -> {
for (final KsqlNode node : hosts) {
final CompletableFuture<RoutingResult> future = futureMap.get(node);
final RoutingResult routingResult = future.join();
pushConnectionsHandle.add(node, routingResult);
}
return pushConnectionsHandle;
}).exceptionally(t -> {
final KsqlNode node = futureMap.entrySet().stream().filter(e -> e.getValue().isCompletedExceptionally()).map(Entry::getKey).findFirst().orElse(null);
for (KsqlNode n : hosts) {
final CompletableFuture<RoutingResult> future = futureMap.get(n);
// Take whatever completed exceptionally and mark it as failed
if (future.isCompletedExceptionally()) {
pushConnectionsHandle.get(n).ifPresent(result -> result.updateStatus(RoutingResultStatus.FAILED));
} else {
final RoutingResult routingResult = future.join();
pushConnectionsHandle.add(node, routingResult);
}
}
LOG.warn("Error routing query {} id {} to host {} at timestamp {} with exception {}", statement.getStatementText(), pushPhysicalPlanManager.getQueryId(), node, System.currentTimeMillis(), t.getCause());
// retries in that case and don't fail the original request.
if (!dynamicallyAddedNode) {
pushConnectionsHandle.completeExceptionally(new KsqlException(String.format("Unable to execute push query \"%s\". %s", statement.getStatementText(), t.getCause().getMessage())));
}
return pushConnectionsHandle;
}).exceptionally(t -> {
LOG.error("Unexpected error handing exception", t);
return pushConnectionsHandle;
});
}
use of io.confluent.ksql.internal.ScalablePushQueryMetrics in project ksql by confluentinc.
the class PushRouting method checkForNewHosts.
private void checkForNewHosts(final ServiceContext serviceContext, final PushPhysicalPlanManager pushPhysicalPlanManager, final ConfiguredStatement<Query> statement, final LogicalSchema outputSchema, final TransientQueryQueue transientQueryQueue, final PushConnectionsHandle pushConnectionsHandle, final Optional<ScalablePushQueryMetrics> scalablePushQueryMetrics, final PushRoutingOptions pushRoutingOptions, final String thisHostName) {
VertxUtils.checkContext(pushPhysicalPlanManager.getContext());
if (pushConnectionsHandle.isClosed()) {
return;
}
final Set<KsqlNode> updatedHosts = registryToNodes.apply(pushPhysicalPlanManager.getScalablePushRegistry());
final Set<KsqlNode> hosts = pushConnectionsHandle.getActiveHosts();
final Set<KsqlNode> newHosts = Sets.difference(updatedHosts, hosts).stream().filter(node -> pushConnectionsHandle.get(node).map(routingResult -> routingResult.getStatus() != RoutingResultStatus.IN_PROGRESS).orElse(true)).collect(Collectors.toSet());
final Set<KsqlNode> removedHosts = Sets.difference(hosts, updatedHosts);
if (newHosts.size() > 0) {
LOG.info("Dynamically adding new hosts {} for {}", newHosts, pushPhysicalPlanManager.getQueryId());
final Set<KsqlNode> catchupHosts = newHosts.stream().filter(node -> pushConnectionsHandle.get(node).map(routingResult -> routingResult.getStatus() == RoutingResultStatus.OFFSET_GAP_FOUND).orElse(false)).collect(Collectors.toSet());
connectToHosts(serviceContext, pushPhysicalPlanManager, statement, newHosts, outputSchema, transientQueryQueue, pushConnectionsHandle, true, scalablePushQueryMetrics, catchupHosts, pushRoutingOptions, thisHostName);
}
if (removedHosts.size() > 0) {
LOG.info("Dynamically removing hosts {} for {}", removedHosts, pushPhysicalPlanManager.getQueryId());
for (final KsqlNode node : removedHosts) {
final RoutingResult result = pushConnectionsHandle.remove(node);
result.close();
result.updateStatus(RoutingResultStatus.REMOVED);
}
}
pushPhysicalPlanManager.getContext().owner().setTimer(clusterCheckInterval, timerId -> checkForNewHosts(serviceContext, pushPhysicalPlanManager, statement, outputSchema, transientQueryQueue, pushConnectionsHandle, scalablePushQueryMetrics, pushRoutingOptions, thisHostName));
}
use of io.confluent.ksql.internal.ScalablePushQueryMetrics in project ksql by confluentinc.
the class ScalablePushQueryMetricsTest method setUp.
@Before
public void setUp() {
when(ksqlEngine.getServiceId()).thenReturn(KSQL_SERVICE_ID);
when(time.nanoseconds()).thenReturn(6000L);
scalablePushQueryMetrics = new ScalablePushQueryMetrics(ksqlEngine.getServiceId(), CUSTOM_TAGS, time, new Metrics());
}
use of io.confluent.ksql.internal.ScalablePushQueryMetrics in project ksql by confluentinc.
the class EngineExecutor method executeScalablePushQuery.
ScalablePushQueryMetadata executeScalablePushQuery(final ImmutableAnalysis analysis, final ConfiguredStatement<Query> statement, final PushRouting pushRouting, final PushRoutingOptions pushRoutingOptions, final QueryPlannerOptions queryPlannerOptions, final Context context, final Optional<ScalablePushQueryMetrics> scalablePushQueryMetrics) {
final SessionConfig sessionConfig = statement.getSessionConfig();
// If we ever change how many hops a request can do, we'll need to update this for correct
// metrics.
final RoutingNodeType routingNodeType = pushRoutingOptions.getHasBeenForwarded() ? RoutingNodeType.REMOTE_NODE : RoutingNodeType.SOURCE_NODE;
PushPhysicalPlan plan = null;
try {
final KsqlConfig ksqlConfig = sessionConfig.getConfig(false);
final LogicalPlanNode logicalPlan = buildAndValidateLogicalPlan(statement, analysis, ksqlConfig, queryPlannerOptions, true);
final PushPhysicalPlanCreator pushPhysicalPlanCreator = (offsetRange, catchupConsumerGroup) -> buildScalablePushPhysicalPlan(logicalPlan, analysis, context, offsetRange, catchupConsumerGroup);
final Optional<PushOffsetRange> offsetRange = pushRoutingOptions.getContinuationToken().map(PushOffsetRange::deserialize);
final Optional<String> catchupConsumerGroup = pushRoutingOptions.getCatchupConsumerGroup();
final PushPhysicalPlanManager physicalPlanManager = new PushPhysicalPlanManager(pushPhysicalPlanCreator, catchupConsumerGroup, offsetRange);
final PushPhysicalPlan physicalPlan = physicalPlanManager.getPhysicalPlan();
plan = physicalPlan;
final TransientQueryQueue transientQueryQueue = new TransientQueryQueue(analysis.getLimitClause());
final PushQueryMetadata.ResultType resultType = physicalPlan.getScalablePushRegistry().isTable() ? physicalPlan.getScalablePushRegistry().isWindowed() ? ResultType.WINDOWED_TABLE : ResultType.TABLE : ResultType.STREAM;
final PushQueryQueuePopulator populator = () -> pushRouting.handlePushQuery(serviceContext, physicalPlanManager, statement, pushRoutingOptions, physicalPlan.getOutputSchema(), transientQueryQueue, scalablePushQueryMetrics, offsetRange);
final PushQueryPreparer preparer = () -> pushRouting.preparePushQuery(physicalPlanManager, statement, pushRoutingOptions);
final ScalablePushQueryMetadata metadata = new ScalablePushQueryMetadata(physicalPlan.getOutputSchema(), physicalPlan.getQueryId(), transientQueryQueue, scalablePushQueryMetrics, resultType, populator, preparer, physicalPlan.getSourceType(), routingNodeType, physicalPlan::getRowsReadFromDataSource);
return metadata;
} catch (final Exception e) {
if (plan == null) {
scalablePushQueryMetrics.ifPresent(m -> m.recordErrorRateForNoResult(1));
} else {
final PushPhysicalPlan pushPhysicalPlan = plan;
scalablePushQueryMetrics.ifPresent(metrics -> metrics.recordErrorRate(1, pushPhysicalPlan.getSourceType(), routingNodeType));
}
final String stmtLower = statement.getStatementText().toLowerCase(Locale.ROOT);
final String messageLower = e.getMessage().toLowerCase(Locale.ROOT);
final String stackLower = Throwables.getStackTraceAsString(e).toLowerCase(Locale.ROOT);
// the contents of the query
if (messageLower.contains(stmtLower) || stackLower.contains(stmtLower)) {
final StackTraceElement loc = Iterables.getLast(Throwables.getCausalChain(e)).getStackTrace()[0];
LOG.error("Failure to execute push query V2 {} {}, not logging the error message since it " + "contains the query string, which may contain sensitive information." + " If you see this LOG message, please submit a GitHub ticket and" + " we will scrub the statement text from the error at {}", pushRoutingOptions.debugString(), queryPlannerOptions.debugString(), loc);
} else {
LOG.error("Failure to execute push query V2. {} {}", pushRoutingOptions.debugString(), queryPlannerOptions.debugString(), e);
}
LOG.debug("Failed push query V2 text {}, {}", statement.getStatementText(), e);
throw new KsqlStatementException(e.getMessage() == null ? "Server Error" + Arrays.toString(e.getStackTrace()) : e.getMessage(), statement.getStatementText(), e);
}
}
use of io.confluent.ksql.internal.ScalablePushQueryMetrics in project ksql by confluentinc.
the class QueryMetricsUtil method initializeScalablePushMetricsCallback.
public static MetricsCallback initializeScalablePushMetricsCallback(final Optional<ScalablePushQueryMetrics> scalablePushQueryMetrics, final SlidingWindowRateLimiter scalablePushBandRateLimiter, final AtomicReference<ScalablePushQueryMetadata> resultForMetrics) {
final MetricsCallback metricsCallback = (statusCode, requestBytes, responseBytes, startTimeNanos) -> scalablePushQueryMetrics.ifPresent(metrics -> {
metrics.recordStatusCode(statusCode);
metrics.recordRequestSize(requestBytes);
final ScalablePushQueryMetadata r = resultForMetrics.get();
if (r == null) {
metrics.recordResponseSizeForError(responseBytes);
metrics.recordConnectionDurationForError(startTimeNanos);
metrics.recordZeroRowsReturnedForError();
metrics.recordZeroRowsProcessedForError();
} else {
final QuerySourceType sourceType = r.getSourceType();
final RoutingNodeType routingNodeType = r.getRoutingNodeType();
metrics.recordResponseSize(responseBytes, sourceType, routingNodeType);
metrics.recordConnectionDuration(startTimeNanos, sourceType, routingNodeType);
metrics.recordRowsReturned(r.getTotalRowsReturned(), sourceType, routingNodeType);
metrics.recordRowsProcessed(r.getTotalRowsProcessed(), sourceType, routingNodeType);
}
scalablePushBandRateLimiter.add(responseBytes);
});
return metricsCallback;
}
Aggregations