use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testConcurrentSnapshotRestoreAndDeleteOther.
public void testConcurrentSnapshotRestoreAndDeleteOther() {
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
final int shards = randomIntBetween(1, 10);
TestClusterNodes.TestClusterNode clusterManagerNode = testClusterNodes.currentClusterManager(testClusterNodes.nodes.values().iterator().next().clusterService.state());
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
final int documentsFirstSnapshot = randomIntBetween(0, 100);
continueOrDie(createRepoAndIndex(repoName, index, shards), createIndexResponse -> indexNDocuments(documentsFirstSnapshot, index, () -> client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).execute(createSnapshotResponseStepListener)));
final int documentsSecondSnapshot = randomIntBetween(0, 100);
final StepListener<CreateSnapshotResponse> createOtherSnapshotResponseStepListener = new StepListener<>();
final String secondSnapshotName = "snapshot-2";
continueOrDie(createSnapshotResponseStepListener, createSnapshotResponse -> indexNDocuments(documentsSecondSnapshot, index, () -> client().admin().cluster().prepareCreateSnapshot(repoName, secondSnapshotName).setWaitForCompletion(true).execute(createOtherSnapshotResponseStepListener)));
final StepListener<AcknowledgedResponse> deleteSnapshotStepListener = new StepListener<>();
final StepListener<RestoreSnapshotResponse> restoreSnapshotResponseListener = new StepListener<>();
continueOrDie(createOtherSnapshotResponseStepListener, createSnapshotResponse -> {
scheduleNow(() -> client().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(deleteSnapshotStepListener));
scheduleNow(() -> client().admin().cluster().restoreSnapshot(new RestoreSnapshotRequest(repoName, secondSnapshotName).waitForCompletion(true).renamePattern("(.+)").renameReplacement("restored_$1"), restoreSnapshotResponseListener));
});
final StepListener<SearchResponse> searchResponseListener = new StepListener<>();
continueOrDie(restoreSnapshotResponseListener, restoreSnapshotResponse -> {
assertEquals(shards, restoreSnapshotResponse.getRestoreInfo().totalShards());
client().search(new SearchRequest("restored_" + index).source(new SearchSourceBuilder().size(0).trackTotalHits(true)), searchResponseListener);
});
deterministicTaskQueue.runAllRunnableTasks();
assertEquals(documentsFirstSnapshot + documentsSecondSnapshot, Objects.requireNonNull(searchResponseListener.result().getHits().getTotalHits()).value);
assertThat(deleteSnapshotStepListener.result().isAcknowledged(), is(true));
assertThat(restoreSnapshotResponseListener.result().getRestoreInfo().failedShards(), is(0));
final Repository repository = clusterManagerNode.repositoriesService.repository(repoName);
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
assertThat(snapshotIds, contains(createOtherSnapshotResponseStepListener.result().getSnapshotInfo().snapshotId()));
for (SnapshotId snapshotId : snapshotIds) {
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId);
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
assertThat(snapshotInfo.indices(), containsInAnyOrder(index));
assertEquals(shards, snapshotInfo.successfulShards());
assertEquals(0, snapshotInfo.failedShards());
}
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class DiskThresholdDeciderIT method testRestoreSnapshotAllocationDoesNotExceedWatermark.
public void testRestoreSnapshotAllocationDoesNotExceedWatermark() throws Exception {
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();
final String dataNodeName = internalCluster().startDataOnlyNode();
ensureStableCluster(3);
assertAcked(client().admin().cluster().preparePutRepository("repo").setType(FsRepository.TYPE).setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean())));
final InternalClusterInfoService clusterInfoService = (InternalClusterInfoService) internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class);
internalCluster().getCurrentMasterNodeInstance(ClusterService.class).addListener(event -> clusterInfoService.refresh());
final String dataNode0Id = internalCluster().getInstance(NodeEnvironment.class, dataNodeName).nodeId();
final Path dataNode0Path = internalCluster().getInstance(Environment.class, dataNodeName).dataFiles()[0];
final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
createIndex(indexName, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 6).put(INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING.getKey(), "0ms").build());
final long minShardSize = createReasonableSizedShards(indexName);
final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("repo", "snap").setWaitForCompletion(true).get();
final SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo();
assertThat(snapshotInfo.successfulShards(), is(snapshotInfo.totalShards()));
assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS));
assertAcked(client().admin().indices().prepareDelete(indexName).get());
// reduce disk size of node 0 so that no shards fit below the low watermark, forcing shards to be assigned to the other data node
fileSystemProvider.getTestFileStore(dataNode0Path).setTotalSpace(minShardSize + WATERMARK_BYTES - 1L);
refreshDiskUsage();
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), Rebalance.NONE.toString()).build()).get());
final RestoreSnapshotResponse restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot("repo", "snap").setWaitForCompletion(true).get();
final RestoreInfo restoreInfo = restoreSnapshotResponse.getRestoreInfo();
assertThat(restoreInfo.successfulShards(), is(snapshotInfo.totalShards()));
assertThat(restoreInfo.failedShards(), is(0));
assertBusy(() -> assertThat(getShardRoutings(dataNode0Id, indexName), empty()));
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().putNull(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey()).build()).get());
// increase disk size of node 0 to allow just enough room for one shard, and check that it's rebalanced back
fileSystemProvider.getTestFileStore(dataNode0Path).setTotalSpace(minShardSize + WATERMARK_BYTES + 1L);
assertBusyWithDiskUsageRefresh(dataNode0Id, indexName, hasSize(1));
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class DedicatedClusterSnapshotRestoreIT method testRestoreShrinkIndex.
/**
* Tests that a shrunken index (created via the shrink APIs) and subsequently snapshotted
* can be restored when the node the shrunken index was created on is no longer part of
* the cluster.
*/
public void testRestoreShrinkIndex() throws Exception {
logger.info("--> starting a cluster-manager node and a data node");
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();
final String repo = "test-repo";
final String snapshot = "test-snap";
final String sourceIdx = "test-idx";
final String shrunkIdx = "test-idx-shrunk";
createRepository(repo, "fs");
assertAcked(prepareCreate(sourceIdx, 0, indexSettingsNoReplicas(between(2, 10))));
ensureGreen();
indexRandomDocs(sourceIdx, randomIntBetween(10, 100));
logger.info("--> shrink the index");
assertAcked(client().admin().indices().prepareUpdateSettings(sourceIdx).setSettings(Settings.builder().put("index.blocks.write", true)).get());
assertAcked(client().admin().indices().prepareResizeIndex(sourceIdx, shrunkIdx).get());
logger.info("--> snapshot the shrunk index");
createSnapshot(repo, snapshot, Collections.singletonList(shrunkIdx));
logger.info("--> delete index and stop the data node");
assertAcked(client().admin().indices().prepareDelete(sourceIdx).get());
assertAcked(client().admin().indices().prepareDelete(shrunkIdx).get());
internalCluster().stopRandomDataNode();
clusterAdmin().prepareHealth().setTimeout("30s").setWaitForNodes("1");
logger.info("--> start a new data node");
final Settings dataSettings = Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), randomAlphaOfLength(5)).put(Environment.PATH_HOME_SETTING.getKey(), // to get a new node id
createTempDir()).build();
internalCluster().startDataOnlyNode(dataSettings);
clusterAdmin().prepareHealth().setTimeout("30s").setWaitForNodes("2");
logger.info("--> restore the shrunk index and ensure all shards are allocated");
RestoreSnapshotResponse restoreResponse = clusterAdmin().prepareRestoreSnapshot(repo, snapshot).setWaitForCompletion(true).setIndices(shrunkIdx).get();
assertEquals(restoreResponse.getRestoreInfo().totalShards(), restoreResponse.getRestoreInfo().successfulShards());
ensureYellow();
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class DedicatedClusterSnapshotRestoreIT method testRetentionLeasesClearedOnRestore.
public void testRetentionLeasesClearedOnRestore() throws Exception {
final String repoName = "test-repo-retention-leases";
createRepository(repoName, "fs");
final String indexName = "index-retention-leases";
final int shardCount = randomIntBetween(1, 5);
assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(indexSettingsNoReplicas(shardCount)));
final ShardId shardId = new ShardId(resolveIndex(indexName), randomIntBetween(0, shardCount - 1));
final int snapshotDocCount = iterations(10, 1000);
logger.debug("--> indexing {} docs into {}", snapshotDocCount, indexName);
IndexRequestBuilder[] indexRequestBuilders = new IndexRequestBuilder[snapshotDocCount];
for (int i = 0; i < snapshotDocCount; i++) {
indexRequestBuilders[i] = client().prepareIndex(indexName).setSource("field", "value");
}
indexRandom(true, indexRequestBuilders);
assertDocCount(indexName, snapshotDocCount);
final String leaseId = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT);
logger.debug("--> adding retention lease with id {} to {}", leaseId, shardId);
client().execute(RetentionLeaseActions.Add.INSTANCE, new RetentionLeaseActions.AddRequest(shardId, leaseId, RETAIN_ALL, "test")).actionGet();
final ShardStats shardStats = Arrays.stream(client().admin().indices().prepareStats(indexName).get().getShards()).filter(s -> s.getShardRouting().shardId().equals(shardId)).findFirst().get();
final RetentionLeases retentionLeases = shardStats.getRetentionLeaseStats().retentionLeases();
assertTrue(shardStats + ": " + retentionLeases, retentionLeases.contains(leaseId));
final String snapshotName = "snapshot-retention-leases";
createSnapshot(repoName, snapshotName, Collections.singletonList(indexName));
if (randomBoolean()) {
final int extraDocCount = iterations(10, 1000);
logger.debug("--> indexing {} extra docs into {}", extraDocCount, indexName);
indexRequestBuilders = new IndexRequestBuilder[extraDocCount];
for (int i = 0; i < extraDocCount; i++) {
indexRequestBuilders[i] = client().prepareIndex(indexName).setSource("field", "value");
}
indexRandom(true, indexRequestBuilders);
}
// Wait for green so the close does not fail in the edge case of coinciding with a shard recovery that hasn't fully synced yet
ensureGreen();
logger.debug("--> close index {}", indexName);
assertAcked(client().admin().indices().prepareClose(indexName));
logger.debug("--> restore index {} from snapshot", indexName);
RestoreSnapshotResponse restoreResponse = clusterAdmin().prepareRestoreSnapshot(repoName, snapshotName).setWaitForCompletion(true).get();
assertThat(restoreResponse.getRestoreInfo().successfulShards(), equalTo(shardCount));
assertThat(restoreResponse.getRestoreInfo().failedShards(), equalTo(0));
ensureGreen();
assertDocCount(indexName, snapshotDocCount);
final RetentionLeases restoredRetentionLeases = Arrays.stream(client().admin().indices().prepareStats(indexName).get().getShards()).filter(s -> s.getShardRouting().shardId().equals(shardId)).findFirst().get().getRetentionLeaseStats().retentionLeases();
assertFalse(restoredRetentionLeases.toString() + " has no " + leaseId, restoredRetentionLeases.contains(leaseId));
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class DedicatedClusterSnapshotRestoreIT method testRestoreIndexWithMissingShards.
public void testRestoreIndexWithMissingShards() throws Exception {
disableRepoConsistencyCheck("This test leaves behind a purposely broken repository");
logger.info("--> start 2 nodes");
internalCluster().startNode();
internalCluster().startNode();
cluster().wipeIndices("_all");
logger.info("--> create an index that will have some unallocated shards");
assertAcked(prepareCreate("test-idx-some", 2, indexSettingsNoReplicas(6)));
ensureGreen();
indexRandomDocs("test-idx-some", 100);
logger.info("--> shutdown one of the nodes");
internalCluster().stopRandomDataNode();
assertThat(clusterAdmin().prepareHealth().setWaitForEvents(Priority.LANGUID).setTimeout("1m").setWaitForNodes("<2").execute().actionGet().isTimedOut(), equalTo(false));
logger.info("--> create an index that will have all allocated shards");
assertAcked(prepareCreate("test-idx-all", 1, indexSettingsNoReplicas(6)));
ensureGreen("test-idx-all");
logger.info("--> create an index that will be closed");
assertAcked(prepareCreate("test-idx-closed", 1, indexSettingsNoReplicas(4)));
indexRandomDocs("test-idx-all", 100);
indexRandomDocs("test-idx-closed", 100);
assertAcked(client().admin().indices().prepareClose("test-idx-closed"));
logger.info("--> create an index that will have no allocated shards");
assertAcked(prepareCreate("test-idx-none", 1, indexSettingsNoReplicas(6).put("index.routing.allocation.include.tag", "nowhere")).setWaitForActiveShards(ActiveShardCount.NONE).get());
assertTrue(indexExists("test-idx-none"));
createRepository("test-repo", "fs");
logger.info("--> start snapshot with default settings without a closed index - should fail");
final SnapshotException sne = expectThrows(SnapshotException.class, () -> clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-1").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(true).execute().actionGet());
assertThat(sne.getMessage(), containsString("Indices don't have primary shards"));
if (randomBoolean()) {
logger.info("checking snapshot completion using status");
clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-2").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(false).setPartial(true).execute().actionGet();
assertBusy(() -> {
SnapshotsStatusResponse snapshotsStatusResponse = clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get();
List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots();
assertEquals(snapshotStatuses.size(), 1);
logger.trace("current snapshot status [{}]", snapshotStatuses.get(0));
assertTrue(snapshotStatuses.get(0).getState().completed());
}, 1, TimeUnit.MINUTES);
SnapshotsStatusResponse snapshotsStatusResponse = clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get();
List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots();
assertThat(snapshotStatuses.size(), equalTo(1));
SnapshotStatus snapshotStatus = snapshotStatuses.get(0);
assertThat(snapshotStatus.getShardsStats().getTotalShards(), equalTo(22));
assertThat(snapshotStatus.getShardsStats().getDoneShards(), lessThan(16));
assertThat(snapshotStatus.getShardsStats().getDoneShards(), greaterThan(10));
// There is slight delay between snapshot being marked as completed in the cluster state and on the file system
// After it was marked as completed in the cluster state - we need to check if it's completed on the file system as well
assertBusy(() -> {
SnapshotInfo snapshotInfo = getSnapshot("test-repo", "test-snap-2");
assertTrue(snapshotInfo.state().completed());
assertEquals(SnapshotState.PARTIAL, snapshotInfo.state());
}, 1, TimeUnit.MINUTES);
} else {
logger.info("checking snapshot completion using wait_for_completion flag");
final CreateSnapshotResponse createSnapshotResponse = clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-2").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(true).setPartial(true).execute().actionGet();
logger.info("State: [{}], Reason: [{}]", createSnapshotResponse.getSnapshotInfo().state(), createSnapshotResponse.getSnapshotInfo().reason());
assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), equalTo(22));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), lessThan(16));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(10));
assertThat(getSnapshot("test-repo", "test-snap-2").state(), equalTo(SnapshotState.PARTIAL));
}
assertAcked(client().admin().indices().prepareClose("test-idx-all"));
logger.info("--> restore incomplete snapshot - should fail");
assertFutureThrows(clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setWaitForCompletion(true).execute(), SnapshotRestoreException.class);
logger.info("--> restore snapshot for the index that was snapshotted completely");
RestoreSnapshotResponse restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-all").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(0));
assertDocCount("test-idx-all", 100L);
logger.info("--> restore snapshot for the partial index");
cluster().wipeIndices("test-idx-some");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-some").setPartial(true).setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), allOf(greaterThan(0), lessThan(6)));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), greaterThan(0));
assertThat(getCountForIndex("test-idx-some"), allOf(greaterThan(0L), lessThan(100L)));
logger.info("--> restore snapshot for the index that didn't have any shards snapshotted successfully");
cluster().wipeIndices("test-idx-none");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-none").setPartial(true).setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(0));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(6));
assertThat(getCountForIndex("test-idx-some"), allOf(greaterThan(0L), lessThan(100L)));
logger.info("--> restore snapshot for the closed index that was snapshotted completely");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-closed").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(4));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(4));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(0));
assertDocCount("test-idx-closed", 100L);
}
Aggregations