use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class LeaderCheckerTests method testLeaderBehaviour.
public void testLeaderBehaviour() {
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
final CapturingTransport capturingTransport = new CapturingTransport();
final TransportService transportService = capturingTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), boundTransportAddress -> localNode, null);
transportService.start();
transportService.acceptIncomingRequests();
final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> fail("shouldn't be checking anything"));
final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).masterNodeId(localNode.getId()).build();
{
leaderChecker.setCurrentNodes(discoveryNodes);
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertFalse(handler.successfulResponseReceived);
assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
assertThat(cause.getMessage(), equalTo("leader check from unknown node"));
}
{
leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).build());
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertTrue(handler.successfulResponseReceived);
assertThat(handler.transportException, nullValue());
}
{
leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).masterNodeId(null).build());
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertFalse(handler.successfulResponseReceived);
assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
assertThat(cause.getMessage(), equalTo("non-leader rejecting leader check"));
}
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class JoinTaskExecutor method becomeMasterAndTrimConflictingNodes.
protected ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<Task> joiningNodes) {
assert currentState.nodes().getMasterNodeId() == null : currentState;
DiscoveryNodes currentNodes = currentState.nodes();
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId());
for (final Task joinTask : joiningNodes) {
if (joinTask.isBecomeMasterTask() || joinTask.isFinishElectionTask()) {
// noop
} else {
final DiscoveryNode joiningNode = joinTask.node();
final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId());
if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode);
nodesBuilder.remove(nodeWithSameId.getId());
}
final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress());
if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode);
nodesBuilder.remove(nodeWithSameAddress.getId());
}
}
}
// now trim any left over dead nodes - either left there when the previous master stepped down
// or removed by us above
ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NoMasterBlockService.NO_MASTER_BLOCK_ID)).build();
logger.trace("becomeMasterAndTrimConflictingNodes: {}", tmpState.nodes());
allocationService.cleanCaches();
return ClusterState.builder(allocationService.disassociateDeadNodes(tmpState, false, "removed dead nodes on election"));
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class PublicationTransportHandler method newPublicationContext.
public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent) {
final DiscoveryNodes nodes = clusterChangedEvent.state().nodes();
final ClusterState newState = clusterChangedEvent.state();
final ClusterState previousState = clusterChangedEvent.previousState();
final boolean sendFullVersion = clusterChangedEvent.previousState().getBlocks().disableStatePersistence();
final Map<Version, BytesReference> serializedStates = new HashMap<>();
final Map<Version, BytesReference> serializedDiffs = new HashMap<>();
// we build these early as a best effort not to commit in the case of error.
// sadly this is not water tight as it may that a failed diff based publishing to a node
// will cause a full serialization based on an older version, which may fail after the
// change has been committed.
buildDiffAndSerializeStates(clusterChangedEvent.state(), clusterChangedEvent.previousState(), nodes, sendFullVersion, serializedStates, serializedDiffs);
return new PublicationContext() {
@Override
public void sendPublishRequest(DiscoveryNode destination, PublishRequest publishRequest, ActionListener<PublishWithJoinResponse> originalListener) {
assert publishRequest.getAcceptedState() == clusterChangedEvent.state() : "state got switched on us";
final ActionListener<PublishWithJoinResponse> responseActionListener;
if (destination.equals(nodes.getLocalNode())) {
// if publishing to self, use original request instead (see currentPublishRequestToSelf for explanation)
final PublishRequest previousRequest = currentPublishRequestToSelf.getAndSet(publishRequest);
// and the new publication started before the previous one completed (which fails anyhow because of higher current term)
assert previousRequest == null || previousRequest.getAcceptedState().term() < publishRequest.getAcceptedState().term();
responseActionListener = new ActionListener<PublishWithJoinResponse>() {
@Override
public void onResponse(PublishWithJoinResponse publishWithJoinResponse) {
// only clean-up our mess
currentPublishRequestToSelf.compareAndSet(publishRequest, null);
originalListener.onResponse(publishWithJoinResponse);
}
@Override
public void onFailure(Exception e) {
// only clean-up our mess
currentPublishRequestToSelf.compareAndSet(publishRequest, null);
originalListener.onFailure(e);
}
};
} else {
responseActionListener = originalListener;
}
if (sendFullVersion || !previousState.nodes().nodeExists(destination)) {
LOGGER.trace("sending full cluster state version {} to {}", newState.version(), destination);
PublicationTransportHandler.this.sendFullClusterState(newState, serializedStates, destination, responseActionListener);
} else {
LOGGER.trace("sending cluster state diff for version {} to {}", newState.version(), destination);
PublicationTransportHandler.this.sendClusterStateDiff(newState, serializedDiffs, serializedStates, destination, responseActionListener);
}
}
@Override
public void sendApplyCommit(DiscoveryNode destination, ApplyCommitRequest applyCommitRequest, ActionListener<TransportResponse.Empty> responseActionListener) {
transportService.sendRequest(destination, COMMIT_STATE_ACTION_NAME, applyCommitRequest, stateRequestOptions, new TransportResponseHandler<TransportResponse.Empty>() {
@Override
public TransportResponse.Empty read(StreamInput in) {
return TransportResponse.Empty.INSTANCE;
}
@Override
public void handleResponse(TransportResponse.Empty response) {
responseActionListener.onResponse(response);
}
@Override
public void handleException(TransportException exp) {
responseActionListener.onFailure(exp);
}
@Override
public String executor() {
return ThreadPool.Names.GENERIC;
}
});
}
};
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class DiskThresholdDeciderTests method testForSingleDataNode.
@Test
public void testForSingleDataNode() {
Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "70%").build();
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 0% used
usagesBuilder.put("node1", new DiskUsage("node1", "n1", "/dev/null", 100, 100));
// 80% used
usagesBuilder.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 20));
// 0% used
usagesBuilder.put("node3", new DiskUsage("node3", "n3", "/dev/null", 100, 100));
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
// We have an index with 1 primary shards each taking 40 bytes. Each node has 100 bytes available
ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
shardSizes.put("[test][0][p]", 40L);
shardSizes.put("[test][1][p]", 40L);
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes.build());
DiskThresholdDecider diskThresholdDecider = makeDecider(diskSettings);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
logger.info("--> adding one master node, one data node");
DiscoveryNode discoveryNode1 = new DiscoveryNode("", "node1", buildNewFakeTransportAddress(), emptyMap(), singleton(DiscoveryNodeRole.MASTER_ROLE), Version.CURRENT);
DiscoveryNode discoveryNode2 = new DiscoveryNode("", "node2", buildNewFakeTransportAddress(), emptyMap(), singleton(DiscoveryNodeRole.DATA_ROLE), Version.CURRENT);
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(discoveryNode1).add(discoveryNode2).build();
ClusterState baseClusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).nodes(discoveryNodes).build();
// Two shards consumes 80% of disk space in data node, but we have only one data node, shards should remain.
ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, true, ShardRoutingState.STARTED);
ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", null, true, ShardRoutingState.STARTED);
RoutingNode firstRoutingNode = new RoutingNode("node2", discoveryNode2, firstRouting, secondRouting);
RoutingTable.Builder builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build();
RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
routingAllocation.debugDecision(true);
Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
// Two shards should start happily
assertThat(decision.type(), equalTo(Decision.Type.YES));
assertThat(decision.getExplanation(), containsString("there is only a single data node present"));
ClusterInfoService cis = () -> {
logger.info("--> calling fake getClusterInfo");
return clusterInfo;
};
AllocationDeciders deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), diskThresholdDecider)));
AllocationService strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
ClusterState result = strategy.reroute(clusterState, "reroute");
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().state(), equalTo(STARTED));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().currentNodeId(), equalTo("node2"));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().relocatingNodeId(), nullValue());
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().state(), equalTo(STARTED));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().currentNodeId(), equalTo("node2"));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().relocatingNodeId(), nullValue());
// Add another datanode, it should relocate.
logger.info("--> adding node3");
DiscoveryNode discoveryNode3 = new DiscoveryNode("", "node3", buildNewFakeTransportAddress(), emptyMap(), singleton(DiscoveryNodeRole.DATA_ROLE), Version.CURRENT);
ClusterState updateClusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(discoveryNode3)).build();
firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, true, ShardRoutingState.STARTED);
secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", "node3", true, ShardRoutingState.RELOCATING);
firstRoutingNode = new RoutingNode("node2", discoveryNode2, firstRouting, secondRouting);
builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build();
routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
routingAllocation.debugDecision(true);
decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
assertThat(decision.type(), equalTo(Decision.Type.YES));
assertThat(decision.getExplanation(), containsString("there is enough disk on this node for the shard to remain, free: [60b]"));
result = strategy.reroute(clusterState, "reroute");
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().state(), equalTo(STARTED));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().currentNodeId(), equalTo("node2"));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().relocatingNodeId(), nullValue());
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().state(), equalTo(RELOCATING));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().currentNodeId(), equalTo("node2"));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().relocatingNodeId(), equalTo("node3"));
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class PublicationTests method testClusterStatePublishingWithFaultyNodeBeforeCommit.
public void testClusterStatePublishingWithFaultyNodeBeforeCommit() throws InterruptedException {
VotingConfiguration singleNodeConfig = new VotingConfiguration(Set.of(n1.getId()));
initializeCluster(singleNodeConfig);
AssertingAckListener ackListener = new AssertingAckListener(nodes.size());
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(n1).add(n2).add(n3).localNodeId(n1.getId()).build();
// number of publish actions + initial faulty nodes injection
AtomicInteger remainingActions = new AtomicInteger(4);
int injectFaultAt = randomInt(remainingActions.get() - 1);
logger.info("Injecting fault at: {}", injectFaultAt);
Set<DiscoveryNode> initialFaultyNodes = remainingActions.decrementAndGet() == injectFaultAt ? Collections.singleton(n2) : Collections.emptySet();
MockPublication publication = node1.publish(CoordinationStateTests.clusterState(1L, 2L, discoveryNodes, singleNodeConfig, singleNodeConfig, 42L), ackListener, initialFaultyNodes);
publication.pendingPublications.entrySet().stream().collect(shuffle()).forEach(e -> {
if (remainingActions.decrementAndGet() == injectFaultAt) {
publication.onFaultyNode(n2);
}
if (e.getKey().equals(n2) == false || randomBoolean()) {
PublishResponse publishResponse = nodeResolver.apply(e.getKey()).coordinationState.handlePublishRequest(publication.publishRequest);
e.getValue().onResponse(new PublishWithJoinResponse(publishResponse, Optional.empty()));
}
});
publication.pendingCommits.entrySet().stream().collect(shuffle()).forEach(e -> {
nodeResolver.apply(e.getKey()).coordinationState.handleCommit(publication.applyCommit);
e.getValue().onResponse(TransportResponse.Empty.INSTANCE);
});
assertTrue(publication.completed);
assertTrue(publication.committed);
// has no influence
publication.onFaultyNode(randomFrom(n1, n3));
List<Tuple<DiscoveryNode, Throwable>> errors = ackListener.awaitErrors(0L, TimeUnit.SECONDS);
assertThat(errors.size(), equalTo(1));
assertThat(errors.get(0).v1(), equalTo(n2));
assertThat(errors.get(0).v2().getMessage(), containsString("faulty node"));
}
Aggregations