use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotStatusApisIT method testCorrectCountsForDoneShards.
/**
* Tests the following sequence of steps:
* 1. Start snapshot of two shards (both located on separate data nodes).
* 2. Have one of the shards snapshot completely and the other block
* 3. Restart the data node that completed its shard snapshot
* 4. Make sure that snapshot status APIs show correct file-counts and -sizes
*
* @throws Exception on failure
*/
public void testCorrectCountsForDoneShards() throws Exception {
final String indexOne = "index-1";
final String indexTwo = "index-2";
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
final String dataNodeOne = dataNodes.get(0);
final String dataNodeTwo = dataNodes.get(1);
createIndex(indexOne, singleShardOneNode(dataNodeOne));
index(indexOne, "_doc", "some_doc_id", "foo", "bar");
createIndex(indexTwo, singleShardOneNode(dataNodeTwo));
index(indexTwo, "_doc", "some_doc_id", "foo", "bar");
final String repoName = "test-repo";
createRepository(repoName, "mock");
blockDataNode(repoName, dataNodeOne);
final String snapshotOne = "snap-1";
// restarting a data node below so using a cluster-manager client here
final ActionFuture<CreateSnapshotResponse> responseSnapshotOne = internalCluster().masterClient().admin().cluster().prepareCreateSnapshot(repoName, snapshotOne).setWaitForCompletion(true).execute();
assertBusy(() -> {
final SnapshotStatus snapshotStatusOne = getSnapshotStatus(repoName, snapshotOne);
assertThat(snapshotStatusOne.getState(), is(SnapshotsInProgress.State.STARTED));
final SnapshotIndexShardStatus snapshotShardState = stateFirstShard(snapshotStatusOne, indexTwo);
assertThat(snapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE));
assertThat(snapshotShardState.getStats().getTotalFileCount(), greaterThan(0));
assertThat(snapshotShardState.getStats().getTotalSize(), greaterThan(0L));
}, 30L, TimeUnit.SECONDS);
final SnapshotStats snapshotShardStats = stateFirstShard(getSnapshotStatus(repoName, snapshotOne), indexTwo).getStats();
final int totalFiles = snapshotShardStats.getTotalFileCount();
final long totalFileSize = snapshotShardStats.getTotalSize();
internalCluster().restartNode(dataNodeTwo);
final SnapshotIndexShardStatus snapshotShardStateAfterNodeRestart = stateFirstShard(getSnapshotStatus(repoName, snapshotOne), indexTwo);
assertThat(snapshotShardStateAfterNodeRestart.getStage(), is(SnapshotIndexShardStage.DONE));
assertThat(snapshotShardStateAfterNodeRestart.getStats().getTotalFileCount(), equalTo(totalFiles));
assertThat(snapshotShardStateAfterNodeRestart.getStats().getTotalSize(), equalTo(totalFileSize));
unblockAllDataNodes(repoName);
assertThat(responseSnapshotOne.get().getSnapshotInfo().state(), is(SnapshotState.SUCCESS));
// indexing another document to the second index so it will do writes during the snapshot and we can block on those writes
index(indexTwo, "_doc", "some_other_doc_id", "foo", "other_bar");
blockDataNode(repoName, dataNodeTwo);
final String snapshotTwo = "snap-2";
final ActionFuture<CreateSnapshotResponse> responseSnapshotTwo = client().admin().cluster().prepareCreateSnapshot(repoName, snapshotTwo).setWaitForCompletion(true).execute();
waitForBlock(dataNodeTwo, repoName, TimeValue.timeValueSeconds(30L));
assertBusy(() -> {
final SnapshotStatus snapshotStatusOne = getSnapshotStatus(repoName, snapshotOne);
final SnapshotStatus snapshotStatusTwo = getSnapshotStatus(repoName, snapshotTwo);
final SnapshotIndexShardStatus snapshotShardStateOne = stateFirstShard(snapshotStatusOne, indexOne);
final SnapshotIndexShardStatus snapshotShardStateTwo = stateFirstShard(snapshotStatusTwo, indexOne);
assertThat(snapshotShardStateOne.getStage(), is(SnapshotIndexShardStage.DONE));
assertThat(snapshotShardStateTwo.getStage(), is(SnapshotIndexShardStage.DONE));
final int totalFilesShardOne = snapshotShardStateOne.getStats().getTotalFileCount();
final long totalSizeShardOne = snapshotShardStateOne.getStats().getTotalSize();
assertThat(totalFilesShardOne, greaterThan(0));
assertThat(totalSizeShardOne, greaterThan(0L));
assertThat(totalFilesShardOne, equalTo(snapshotShardStateTwo.getStats().getTotalFileCount()));
assertThat(totalSizeShardOne, equalTo(snapshotShardStateTwo.getStats().getTotalSize()));
assertThat(snapshotShardStateTwo.getStats().getIncrementalFileCount(), equalTo(0));
assertThat(snapshotShardStateTwo.getStats().getIncrementalSize(), equalTo(0L));
}, 30L, TimeUnit.SECONDS);
unblockAllDataNodes(repoName);
assertThat(responseSnapshotTwo.get().getSnapshotInfo().state(), is(SnapshotState.SUCCESS));
}
use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method testSnapshotStatus.
public void testSnapshotStatus() throws Exception {
Client client = client();
createRepository("test-repo", "mock", Settings.builder().put("location", randomRepoPath()).put("random", randomAlphaOfLength(10)).put("wait_after_unblock", 200));
// Create index on 2 nodes and make sure each node has a primary by setting no replicas
assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_replicas", 0)));
indexRandomDocs("test-idx", 100);
// Pick one node and block it
String blockedNode = blockNodeWithIndex("test-repo", "test-idx");
logger.info("--> snapshot");
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIncludeGlobalState(false).setIndices("test-idx").get();
logger.info("--> waiting for block to kick in");
waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
logger.info("--> execution was blocked on node [{}], checking snapshot status with specified repository and snapshot", blockedNode);
SnapshotsStatusResponse response = client.admin().cluster().prepareSnapshotStatus("test-repo").execute().actionGet();
assertThat(response.getSnapshots().size(), equalTo(1));
SnapshotStatus snapshotStatus = response.getSnapshots().get(0);
assertThat(snapshotStatus.getState(), equalTo(State.STARTED));
assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
// We blocked the node during data write operation, so at least one shard snapshot should be in STARTED stage
assertThat(snapshotStatus.getShardsStats().getStartedShards(), greaterThan(0));
for (SnapshotIndexShardStatus shardStatus : snapshotStatus.getIndices().get("test-idx")) {
if (shardStatus.getStage() == SnapshotIndexShardStage.STARTED) {
assertThat(shardStatus.getNodeId(), notNullValue());
}
}
logger.info("--> checking snapshot status for all currently running and snapshot with empty repository");
response = client.admin().cluster().prepareSnapshotStatus().execute().actionGet();
assertThat(response.getSnapshots().size(), equalTo(1));
snapshotStatus = response.getSnapshots().get(0);
assertThat(snapshotStatus.getState(), equalTo(State.STARTED));
assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
// We blocked the node during data write operation, so at least one shard snapshot should be in STARTED stage
assertThat(snapshotStatus.getShardsStats().getStartedShards(), greaterThan(0));
for (SnapshotIndexShardStatus shardStatus : snapshotStatus.getIndices().get("test-idx")) {
if (shardStatus.getStage() == SnapshotIndexShardStage.STARTED) {
assertThat(shardStatus.getNodeId(), notNullValue());
}
}
logger.info("--> checking that _current returns the currently running snapshot");
GetSnapshotsResponse getResponse = client.admin().cluster().prepareGetSnapshots("test-repo").setCurrentSnapshot().execute().actionGet();
assertThat(getResponse.getSnapshots().size(), equalTo(1));
SnapshotInfo snapshotInfo = getResponse.getSnapshots().get(0);
assertThat(snapshotInfo.state(), equalTo(SnapshotState.IN_PROGRESS));
logger.info("--> unblocking blocked node");
unblockNode("test-repo", blockedNode);
snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(600));
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
logger.info("--> done");
logger.info("--> checking snapshot status again after snapshot is done");
response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap").execute().actionGet();
snapshotStatus = response.getSnapshots().get(0);
assertThat(snapshotStatus.getIndices().size(), equalTo(1));
assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
SnapshotIndexStatus indexStatus = snapshotStatus.getIndices().get("test-idx");
assertThat(indexStatus, notNullValue());
assertThat(indexStatus.getShardsStats().getInitializingShards(), equalTo(0));
assertThat(indexStatus.getShardsStats().getFailedShards(), equalTo(snapshotInfo.failedShards()));
assertThat(indexStatus.getShardsStats().getDoneShards(), equalTo(snapshotInfo.successfulShards()));
assertThat(indexStatus.getShards().size(), equalTo(snapshotInfo.totalShards()));
logger.info("--> checking snapshot status after it is done with empty repository");
response = client.admin().cluster().prepareSnapshotStatus().execute().actionGet();
assertThat(response.getSnapshots().size(), equalTo(0));
logger.info("--> checking that _current no longer returns the snapshot");
assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").addSnapshots("_current").execute().actionGet().getSnapshots().isEmpty(), equalTo(true));
// test that getting an unavailable snapshot status throws an exception if ignoreUnavailable is false on the request
SnapshotMissingException ex = expectThrows(SnapshotMissingException.class, () -> client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap-doesnt-exist").get());
assertEquals("[test-repo:test-snap-doesnt-exist] is missing", ex.getMessage());
// test that getting an unavailable snapshot status does not throw an exception if ignoreUnavailable is true on the request
response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap-doesnt-exist").setIgnoreUnavailable(true).get();
assertTrue(response.getSnapshots().isEmpty());
// test getting snapshot status for available and unavailable snapshots where ignoreUnavailable is true
// (available one should be returned)
response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap", "test-snap-doesnt-exist").setIgnoreUnavailable(true).get();
assertEquals(1, response.getSnapshots().size());
assertEquals("test-snap", response.getSnapshots().get(0).getSnapshot().getSnapshotId().getName());
}
use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method testSnapshotMoreThanOnce.
public void testSnapshotMoreThanOnce() throws InterruptedException {
Client client = client();
createRepository("test-repo", "fs");
// only one shard
final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build();
assertAcked(prepareCreate("test").setSettings(indexSettings));
ensureGreen();
indexRandomDocs("test", randomIntBetween(10, 100));
assertNoFailures(client().admin().indices().prepareForceMerge("test").setFlush(true).setMaxNumSegments(1).get());
createSnapshot("test-repo", "test", Collections.singletonList("test"));
assertThat(getSnapshot("test-repo", "test").state(), equalTo(SnapshotState.SUCCESS));
{
SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test").get().getSnapshots().get(0);
List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
for (SnapshotIndexShardStatus status : shards) {
assertThat(status.getStats().getProcessedFileCount(), greaterThan(1));
}
}
createSnapshot("test-repo", "test-1", Collections.singletonList("test"));
assertThat(getSnapshot("test-repo", "test-1").state(), equalTo(SnapshotState.SUCCESS));
{
SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-1").get().getSnapshots().get(0);
List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
for (SnapshotIndexShardStatus status : shards) {
assertThat(status.getStats().getProcessedFileCount(), equalTo(0));
}
}
client().prepareDelete("test", "1").get();
createSnapshot("test-repo", "test-2", Collections.singletonList("test"));
assertThat(getSnapshot("test-repo", "test-2").state(), equalTo(SnapshotState.SUCCESS));
{
SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-2").get().getSnapshots().get(0);
Settings settings = client.admin().indices().prepareGetSettings("test").get().getIndexToSettings().get("test");
List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
for (SnapshotIndexShardStatus status : shards) {
// we flush before the snapshot such that we have to process the segments_N files plus the .del file
// soft-delete generates DV files.
assertThat(status.getStats().getProcessedFileCount(), greaterThan(2));
}
}
}
use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus in project OpenSearch by opensearch-project.
the class CloneSnapshotIT method testCloneSnapshotIndex.
public void testCloneSnapshotIndex() throws Exception {
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();
final String repoName = "repo-name";
createRepository(repoName, "fs");
final String indexName = "index-1";
createIndexWithRandomDocs(indexName, randomIntBetween(5, 10));
final String sourceSnapshot = "source-snapshot";
createFullSnapshot(repoName, sourceSnapshot);
indexRandomDocs(indexName, randomIntBetween(20, 100));
if (randomBoolean()) {
assertAcked(admin().indices().prepareDelete(indexName));
}
final String targetSnapshot = "target-snapshot";
assertAcked(startClone(repoName, sourceSnapshot, targetSnapshot, indexName).get());
final List<SnapshotStatus> status = clusterAdmin().prepareSnapshotStatus(repoName).setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots();
assertThat(status, hasSize(2));
final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName);
final SnapshotIndexStatus status2 = status.get(1).getIndices().get(indexName);
assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount());
assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize());
}
use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus in project OpenSearch by opensearch-project.
the class DedicatedClusterSnapshotRestoreIT method testRestoreIndexWithMissingShards.
public void testRestoreIndexWithMissingShards() throws Exception {
disableRepoConsistencyCheck("This test leaves behind a purposely broken repository");
logger.info("--> start 2 nodes");
internalCluster().startNode();
internalCluster().startNode();
cluster().wipeIndices("_all");
logger.info("--> create an index that will have some unallocated shards");
assertAcked(prepareCreate("test-idx-some", 2, indexSettingsNoReplicas(6)));
ensureGreen();
indexRandomDocs("test-idx-some", 100);
logger.info("--> shutdown one of the nodes");
internalCluster().stopRandomDataNode();
assertThat(clusterAdmin().prepareHealth().setWaitForEvents(Priority.LANGUID).setTimeout("1m").setWaitForNodes("<2").execute().actionGet().isTimedOut(), equalTo(false));
logger.info("--> create an index that will have all allocated shards");
assertAcked(prepareCreate("test-idx-all", 1, indexSettingsNoReplicas(6)));
ensureGreen("test-idx-all");
logger.info("--> create an index that will be closed");
assertAcked(prepareCreate("test-idx-closed", 1, indexSettingsNoReplicas(4)));
indexRandomDocs("test-idx-all", 100);
indexRandomDocs("test-idx-closed", 100);
assertAcked(client().admin().indices().prepareClose("test-idx-closed"));
logger.info("--> create an index that will have no allocated shards");
assertAcked(prepareCreate("test-idx-none", 1, indexSettingsNoReplicas(6).put("index.routing.allocation.include.tag", "nowhere")).setWaitForActiveShards(ActiveShardCount.NONE).get());
assertTrue(indexExists("test-idx-none"));
createRepository("test-repo", "fs");
logger.info("--> start snapshot with default settings without a closed index - should fail");
final SnapshotException sne = expectThrows(SnapshotException.class, () -> clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-1").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(true).execute().actionGet());
assertThat(sne.getMessage(), containsString("Indices don't have primary shards"));
if (randomBoolean()) {
logger.info("checking snapshot completion using status");
clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-2").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(false).setPartial(true).execute().actionGet();
assertBusy(() -> {
SnapshotsStatusResponse snapshotsStatusResponse = clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get();
List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots();
assertEquals(snapshotStatuses.size(), 1);
logger.trace("current snapshot status [{}]", snapshotStatuses.get(0));
assertTrue(snapshotStatuses.get(0).getState().completed());
}, 1, TimeUnit.MINUTES);
SnapshotsStatusResponse snapshotsStatusResponse = clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get();
List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots();
assertThat(snapshotStatuses.size(), equalTo(1));
SnapshotStatus snapshotStatus = snapshotStatuses.get(0);
assertThat(snapshotStatus.getShardsStats().getTotalShards(), equalTo(22));
assertThat(snapshotStatus.getShardsStats().getDoneShards(), lessThan(16));
assertThat(snapshotStatus.getShardsStats().getDoneShards(), greaterThan(10));
// There is slight delay between snapshot being marked as completed in the cluster state and on the file system
// After it was marked as completed in the cluster state - we need to check if it's completed on the file system as well
assertBusy(() -> {
SnapshotInfo snapshotInfo = getSnapshot("test-repo", "test-snap-2");
assertTrue(snapshotInfo.state().completed());
assertEquals(SnapshotState.PARTIAL, snapshotInfo.state());
}, 1, TimeUnit.MINUTES);
} else {
logger.info("checking snapshot completion using wait_for_completion flag");
final CreateSnapshotResponse createSnapshotResponse = clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-2").setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed").setWaitForCompletion(true).setPartial(true).execute().actionGet();
logger.info("State: [{}], Reason: [{}]", createSnapshotResponse.getSnapshotInfo().state(), createSnapshotResponse.getSnapshotInfo().reason());
assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), equalTo(22));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), lessThan(16));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(10));
assertThat(getSnapshot("test-repo", "test-snap-2").state(), equalTo(SnapshotState.PARTIAL));
}
assertAcked(client().admin().indices().prepareClose("test-idx-all"));
logger.info("--> restore incomplete snapshot - should fail");
assertFutureThrows(clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setWaitForCompletion(true).execute(), SnapshotRestoreException.class);
logger.info("--> restore snapshot for the index that was snapshotted completely");
RestoreSnapshotResponse restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-all").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(0));
assertDocCount("test-idx-all", 100L);
logger.info("--> restore snapshot for the partial index");
cluster().wipeIndices("test-idx-some");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-some").setPartial(true).setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), allOf(greaterThan(0), lessThan(6)));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), greaterThan(0));
assertThat(getCountForIndex("test-idx-some"), allOf(greaterThan(0L), lessThan(100L)));
logger.info("--> restore snapshot for the index that didn't have any shards snapshotted successfully");
cluster().wipeIndices("test-idx-none");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-none").setPartial(true).setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(0));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(6));
assertThat(getCountForIndex("test-idx-some"), allOf(greaterThan(0L), lessThan(100L)));
logger.info("--> restore snapshot for the closed index that was snapshotted completely");
restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-closed").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue());
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(4));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(4));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(0));
assertDocCount("test-idx-closed", 100L);
}
Aggregations