use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class NodeDisconnectJobMonitorService method broadcastKillToParticipatingNodes.
/**
* Broadcast the kill if *this* node is the coordinator and a participating node died
* The information which nodes are participating is only available on the coordinator, so other nodes
* can not kill the jobs on their own.
*
* <pre>
* n1 n2 n3
* | | |
* startJob 1 (n1,n2,n3) | |
* | | |
* | *dies* |
* | |
* onNodeDisc(n2) onNodeDisc(n2)
* broadcast kill job1 does not know which jobs involve n2
* |
* kill job1 <-+----------------------------------> kill job1
*
* </pre>
*/
private void broadcastKillToParticipatingNodes(DiscoveryNode deadNode) {
List<UUID> affectedJobs = tasksService.getJobIdsByParticipatingNodes(deadNode.getId()).collect(Collectors.toList());
if (affectedJobs.isEmpty()) {
return;
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Broadcasting kill for {} jobs because they involved disconnected node={}", affectedJobs.size(), deadNode.getId());
}
List<String> excludedNodeIds = Collections.singletonList(deadNode.getId());
KillJobsRequest killRequest = new KillJobsRequest(affectedJobs, User.CRATE_USER.name(), "Participating node=" + deadNode.getName() + " disconnected.");
killJobsNodeAction.broadcast(killRequest, new ActionListener<>() {
@Override
public void onResponse(Long numKilled) {
}
@Override
public void onFailure(Exception e) {
LOGGER.warn("failed to send kill request to nodes");
}
}, excludedNodeIds);
}
use of io.crate.execution.jobs.kill.KillJobsRequest in project crate by crate.
the class NodeDisconnectJobMonitorServiceTest method testOnParticipatingNodeDisconnectedKillsJob.
@Test
public void testOnParticipatingNodeDisconnectedKillsJob() throws Exception {
TasksService tasksService = tasksInstance();
DiscoveryNode coordinator = newNode("coordinator");
DiscoveryNode dataNode = newNode("dataNode");
RootTask.Builder builder = tasksService.newBuilder(UUID.randomUUID(), "dummy-user", coordinator.getId(), Arrays.asList(coordinator.getId(), dataNode.getId()));
builder.addTask(new DummyTask());
tasksService.createTask(builder);
// add a second job that is coordinated by the other node to make sure the the broadcast logic is run
// even though there are jobs coordinated by the disconnected node
builder = tasksService.newBuilder(UUID.randomUUID(), "dummy-user", dataNode.getId(), Collections.emptySet());
builder.addTask(new DummyTask());
tasksService.createTask(builder);
AtomicInteger broadcasts = new AtomicInteger(0);
TransportKillJobsNodeAction killAction = new TransportKillJobsNodeAction(tasksService, clusterService, mock(TransportService.class)) {
@Override
public void broadcast(KillJobsRequest request, ActionListener<Long> listener, Collection<String> excludedNodeIds) {
broadcasts.incrementAndGet();
}
};
NodeDisconnectJobMonitorService monitorService = new NodeDisconnectJobMonitorService(tasksService, new NodeLimits(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), mock(TransportService.class), killAction);
monitorService.onNodeDisconnected(dataNode, mock(Transport.Connection.class));
assertThat(broadcasts.get(), is(1));
monitorService.close();
}
Aggregations