use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ZenDiscoveryIT method testHandleNodeJoin_incompatibleClusterState.
public void testHandleNodeJoin_incompatibleClusterState() throws InterruptedException, ExecutionException, TimeoutException {
String masterNode = internalCluster().startMasterOnlyNode();
String node1 = internalCluster().startNode();
ClusterService clusterService = internalCluster().getInstance(ClusterService.class, node1);
Coordinator coordinator = (Coordinator) internalCluster().getInstance(Discovery.class, masterNode);
final ClusterState state = clusterService.state();
Metadata.Builder mdBuilder = Metadata.builder(state.metadata());
mdBuilder.putCustom(CustomMetadata.TYPE, new CustomMetadata("data"));
ClusterState stateWithCustomMetadata = ClusterState.builder(state).metadata(mdBuilder).build();
final CompletableFuture<Throwable> future = new CompletableFuture<>();
DiscoveryNode node = state.nodes().getLocalNode();
coordinator.sendValidateJoinRequest(stateWithCustomMetadata, new JoinRequest(node, 0L, Optional.empty()), new JoinHelper.JoinCallback() {
@Override
public void onSuccess() {
future.completeExceptionally(new AssertionError("onSuccess should not be called"));
}
@Override
public void onFailure(Exception e) {
future.complete(e);
}
});
Throwable t = future.get(10, TimeUnit.SECONDS);
assertTrue(t instanceof IllegalStateException);
assertTrue(t.getCause() instanceof RemoteTransportException);
assertTrue(t.getCause().getCause() instanceof IllegalArgumentException);
assertThat(t.getCause().getCause().getMessage(), containsString("Unknown NamedWriteable"));
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class AllocationIdIT method testFailedRecoveryOnAllocateStalePrimaryRequiresAnotherAllocateStalePrimary.
public void testFailedRecoveryOnAllocateStalePrimaryRequiresAnotherAllocateStalePrimary() throws Exception {
/*
* Allocation id is put on start of shard while historyUUID is adjusted after recovery is done.
*
* If during execution of AllocateStalePrimary a proper allocation id is stored in allocation id set and recovery is failed
* shard restart skips the stage where historyUUID is changed.
*
* That leads to situation where allocated stale primary and its replica belongs to the same historyUUID and
* replica will receive operations after local checkpoint while documents before checkpoints could be significant different.
*
* Therefore, on AllocateStalePrimary we put some fake allocation id (no real one could be generated like that)
* and any failure during recovery requires extra AllocateStalePrimary command to be executed.
*/
// initial set up
final String indexName = "index42";
final String master = internalCluster().startMasterOnlyNode();
String node1 = internalCluster().startNode();
createIndex(indexName, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), "checksum").build());
final int numDocs = indexDocs(indexName, "foo", "bar");
final IndexSettings indexSettings = getIndexSettings(indexName, node1);
final Set<String> allocationIds = getAllocationIds(indexName);
final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
final Path indexPath = getIndexPath(node1, shardId);
assertThat(allocationIds, hasSize(1));
final String historyUUID = historyUUID(node1, indexName);
String node2 = internalCluster().startNode();
ensureGreen(indexName);
internalCluster().assertSameDocIdsOnShards();
// initial set up is done
Settings node1DataPathSettings = internalCluster().dataPathSettings(node1);
Settings node2DataPathSettings = internalCluster().dataPathSettings(node2);
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1));
// index more docs to node2 that marks node1 as stale
int numExtraDocs = indexDocs(indexName, "foo", "bar2");
assertHitCount(client(node2).prepareSearch(indexName).setQuery(matchAllQuery()).get(), numDocs + numExtraDocs);
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node2));
// create fake corrupted marker on node1
putFakeCorruptionMarker(indexSettings, shardId, indexPath);
// thanks to master node1 is out of sync
node1 = internalCluster().startNode(node1DataPathSettings);
// there is only _stale_ primary
checkNoValidShardCopy(indexName, shardId);
// allocate stale primary
client(node1).admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)).get();
// allocation fails due to corruption marker
assertBusy(() -> {
final ClusterState state = client().admin().cluster().prepareState().get().getState();
final ShardRouting shardRouting = state.routingTable().index(indexName).shard(shardId.id()).primaryShard();
assertThat(shardRouting.state(), equalTo(ShardRoutingState.UNASSIGNED));
assertThat(shardRouting.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
});
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1));
try (Store store = new Store(shardId, indexSettings, new NIOFSDirectory(indexPath), new DummyShardLock(shardId))) {
store.removeCorruptionMarker();
}
node1 = internalCluster().startNode(node1DataPathSettings);
// index is red: no any shard is allocated (allocation id is a fake id that does not match to anything)
checkHealthStatus(indexName, ClusterHealthStatus.RED);
checkNoValidShardCopy(indexName, shardId);
// no any valid shard is there; have to invoke AllocateStalePrimary again
client().admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)).get();
ensureYellow(indexName);
// bring node2 back
node2 = internalCluster().startNode(node2DataPathSettings);
ensureGreen(indexName);
assertThat(historyUUID(node1, indexName), not(equalTo(historyUUID)));
assertThat(historyUUID(node1, indexName), equalTo(historyUUID(node2, indexName)));
internalCluster().assertSameDocIdsOnShards();
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ClusterServiceIT method testPendingUpdateTask.
public void testPendingUpdateTask() throws Exception {
String node_0 = internalCluster().startNode();
internalCluster().startCoordinatingOnlyNode(Settings.EMPTY);
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, node_0);
final CountDownLatch block1 = new CountDownLatch(1);
final CountDownLatch invoked1 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
invoked1.countDown();
try {
block1.await();
} catch (InterruptedException e) {
fail();
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
invoked1.countDown();
fail();
}
});
invoked1.await();
final CountDownLatch invoked2 = new CountDownLatch(9);
for (int i = 2; i <= 10; i++) {
clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
invoked2.countDown();
}
});
}
// there might be other tasks in this node, make sure to only take the ones we add into account in this test
// The tasks can be re-ordered, so we need to check out-of-order
Set<String> controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
List<PendingClusterTask> pendingClusterTasks = clusterService.getMasterService().pendingTasks();
assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(10));
assertThat(pendingClusterTasks.get(0).getSource().string(), equalTo("1"));
assertThat(pendingClusterTasks.get(0).isExecuting(), equalTo(true));
for (PendingClusterTask task : pendingClusterTasks) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
PendingClusterTasksResponse response = internalCluster().coordOnlyNodeClient().admin().cluster().preparePendingClusterTasks().get();
assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(10));
assertThat(response.pendingTasks().get(0).getSource().string(), equalTo("1"));
assertThat(response.pendingTasks().get(0).isExecuting(), equalTo(true));
for (PendingClusterTask task : response) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
block1.countDown();
invoked2.await();
// whenever we test for no tasks, we need to wait since this is a live node
assertBusy(() -> assertTrue("Pending tasks not empty", clusterService.getMasterService().pendingTasks().isEmpty()));
waitNoPendingTasksOnAll();
final CountDownLatch block2 = new CountDownLatch(1);
final CountDownLatch invoked3 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
invoked3.countDown();
try {
block2.await();
} catch (InterruptedException e) {
fail();
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
invoked3.countDown();
fail();
}
});
invoked3.await();
for (int i = 2; i <= 5; i++) {
clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
});
}
Thread.sleep(100);
pendingClusterTasks = clusterService.getMasterService().pendingTasks();
assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(5));
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
for (PendingClusterTask task : pendingClusterTasks) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
response = internalCluster().coordOnlyNodeClient().admin().cluster().preparePendingClusterTasks().get();
assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(5));
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
for (PendingClusterTask task : response) {
if (controlSources.remove(task.getSource().string())) {
assertThat(task.getTimeInQueueInMillis(), greaterThan(0L));
}
}
assertTrue(controlSources.isEmpty());
block2.countDown();
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ClusterShardLimitIT method testIndexCreationOverLimit.
public void testIndexCreationOverLimit() {
int dataNodes = client().admin().cluster().prepareState().get().getState().getNodes().getDataNodes().size();
ShardCounts counts = ShardCounts.forDataNodeCount(dataNodes);
setShardsPerNode(counts.getShardsPerNode());
// Create an index that will bring us up to the limit
createIndex("test", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFirstIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFirstIndexReplicas()).build());
try {
prepareCreate("should-fail", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFailingIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFailingIndexReplicas())).get();
fail("Should not have been able to go over the limit");
} catch (IllegalArgumentException e) {
verifyException(dataNodes, counts, e);
}
ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
assertFalse(clusterState.getMetadata().hasIndex("should-fail"));
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ClusterShardLimitIT method testIndexCreationOverLimitFromTemplate.
public void testIndexCreationOverLimitFromTemplate() {
int dataNodes = client().admin().cluster().prepareState().get().getState().getNodes().getDataNodes().size();
final ShardCounts counts = ShardCounts.forDataNodeCount(dataNodes);
setShardsPerNode(counts.getShardsPerNode());
if (counts.getFirstIndexShards() > 0) {
createIndex("test", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFirstIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFirstIndexReplicas()).build());
}
assertAcked(client().admin().indices().preparePutTemplate("should-fail").setPatterns(Collections.singletonList("should-fail")).setOrder(1).setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, counts.getFailingIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFailingIndexReplicas())).get());
final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> client().admin().indices().prepareCreate("should-fail").get());
verifyException(dataNodes, counts, e);
ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
assertFalse(clusterState.getMetadata().hasIndex("should-fail"));
}
Aggregations