use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class RemoteCollector method killRemoteContext.
private void killRemoteContext() {
KillJobsRequest killRequest = new KillJobsRequest(List.of(jobId), sessionSettings.userName(), null);
transportKillJobsNodeAction.broadcast(killRequest, new ActionListener<>() {
@Override
public void onResponse(Long numKilled) {
context.kill(null);
}
@Override
public void onFailure(Exception e) {
context.kill(e.getMessage());
}
});
}
use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class InterceptingRowConsumer method tryForwardResult.
private void tryForwardResult(Throwable throwable) {
if (throwable != null && (failure == null || failure instanceof InterruptedException)) {
failure = SQLExceptions.unwrap(throwable);
}
if (consumerInvokedAndJobInitialized.decrementAndGet() > 0) {
return;
}
if (failure == null) {
assert iterator != null : "iterator must be present";
ThreadPools.forceExecute(executor, () -> consumer.accept(iterator, null));
} else {
consumer.accept(null, failure);
KillJobsRequest killRequest = new KillJobsRequest(List.of(jobId), User.CRATE_USER.name(), "An error was encountered: " + failure);
transportKillJobsNodeAction.broadcast(killRequest, new ActionListener<>() {
@Override
public void onResponse(Long numKilled) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Killed {} contexts for jobId={} forwarding the failure={}", numKilled, jobId, failure);
}
}
@Override
public void onFailure(Exception e) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Failed to kill jobId={}, forwarding failure={} anyway", jobId, failure);
}
}
});
}
}
use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class TransportDistributedResultActionTest method testKillIsInvokedIfContextIsNotFound.
@Test
public void testKillIsInvokedIfContextIsNotFound() throws Exception {
TasksService tasksService = new TasksService(clusterService, new JobsLogs(() -> false));
AtomicInteger numBroadcasts = new AtomicInteger(0);
TransportKillJobsNodeAction killJobsAction = new TransportKillJobsNodeAction(tasksService, clusterService, mock(TransportService.class)) {
@Override
public void broadcast(KillJobsRequest request, ActionListener<Long> listener, Collection<String> excludedNodeIds) {
numBroadcasts.incrementAndGet();
}
};
TransportDistributedResultAction transportDistributedResultAction = new TransportDistributedResultAction(mock(Transports.class), tasksService, THREAD_POOL, mock(TransportService.class), clusterService, killJobsAction, BackoffPolicy.exponentialBackoff(TimeValue.ZERO, 0));
StreamBucket.Builder builder = new StreamBucket.Builder(new Streamer[0], RamAccounting.NO_ACCOUNTING);
try {
transportDistributedResultAction.nodeOperation(new DistributedResultRequest(UUID.randomUUID(), 0, (byte) 0, 0, builder.build(), true)).get(5, TimeUnit.SECONDS);
fail("nodeOperation call should fail with TaskMissing");
} catch (ExecutionException e) {
assertThat(e.getCause(), Matchers.instanceOf(TaskMissing.class));
}
assertThat(numBroadcasts.get(), is(1));
}
use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class RemoteCollectorTest method prepare.
@Before
public void prepare() {
MockitoAnnotations.initMocks(this);
UUID jobId = UUID.randomUUID();
RoutedCollectPhase collectPhase = new RoutedCollectPhase(jobId, 0, "remoteCollect", new Routing(Map.of("remoteNode", Map.of("dummyTable", IntArrayList.from(1)))), RowGranularity.DOC, Collections.singletonList(createReference("name", DataTypes.STRING)), Collections.emptyList(), WhereClause.MATCH_ALL.queryOrFallback(), DistributionInfo.DEFAULT_BROADCAST);
transportJobAction = mock(TransportJobAction.class);
TasksService tasksService = new TasksService(clusterService, new JobsLogs(() -> true));
numBroadcastCalls = new AtomicInteger(0);
transportKillJobsNodeAction = new TransportKillJobsNodeAction(tasksService, clusterService, mock(TransportService.class)) {
@Override
public void broadcast(KillJobsRequest request, ActionListener<Long> listener) {
numBroadcastCalls.incrementAndGet();
}
};
consumer = new TestingRowConsumer();
remoteCollector = new RemoteCollector(jobId, new SessionSettings("dummyUser", SearchPath.createSearchPathFrom("dummySchema")), "localNode", "remoteNode", transportJobAction, transportKillJobsNodeAction, Runnable::run, tasksService, RamAccounting.NO_ACCOUNTING, consumer, collectPhase);
}
use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class TransportDistributedResultAction method retryOrFailureResponse.
private CompletableFuture<DistributedResultResponse> retryOrFailureResponse(DistributedResultRequest request, @Nullable Iterator<TimeValue> retryDelay) {
if (retryDelay == null) {
retryDelay = backoffPolicy.iterator();
}
if (retryDelay.hasNext()) {
TimeValue delay = retryDelay.next();
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("scheduling retry to start node operation for jobId: {} in {}ms", request.jobId(), delay.getMillis());
}
NodeOperationRunnable operationRunnable = new NodeOperationRunnable(request, retryDelay);
scheduler.schedule(operationRunnable::run, delay.getMillis(), TimeUnit.MILLISECONDS);
return operationRunnable;
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Received a result for job={} but couldn't find a RootTask for it", request.jobId());
}
List<String> excludedNodeIds = Collections.singletonList(clusterService.localNode().getId());
/* The upstream (DistributingConsumer) forwards failures to other downstreams and eventually considers its job done.
* But it cannot inform the handler-merge about a failure because the JobResponse is sent eagerly.
*
* The handler local-merge would get stuck if not all its upstreams send their requests, so we need to invoke
* a kill to make sure that doesn't happen.
*/
KillJobsRequest killRequest = new KillJobsRequest(List.of(request.jobId()), User.CRATE_USER.name(), "Received data for job=" + request.jobId() + " but there is no job context present. " + "This can happen due to bad network latency or if individual nodes are unresponsive due to high load");
killJobsAction.broadcast(killRequest, new ActionListener<>() {
@Override
public void onResponse(Long numKilled) {
}
@Override
public void onFailure(Exception e) {
LOGGER.debug("Could not kill " + request.jobId(), e);
}
}, excludedNodeIds);
return CompletableFuture.failedFuture(new TaskMissing(TaskMissing.Type.ROOT, request.jobId()));
}
}
Aggregations