use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method testDataFileFailureDuringRestore.
public void testDataFileFailureDuringRestore() throws Exception {
disableRepoConsistencyCheck("This test intentionally leaves a broken repository");
Path repositoryLocation = randomRepoPath();
Client client = client();
createRepository("test-repo", "fs", repositoryLocation);
prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retries", Integer.MAX_VALUE)).get();
ensureGreen();
final NumShards numShards = getNumShards("test-idx");
indexRandomDocs("test-idx", 100);
createSnapshot("test-repo", "test-snap", Collections.singletonList("test-idx"));
createRepository("test-repo", "mock", Settings.builder().put("location", repositoryLocation).put("random", randomAlphaOfLength(10)).put("random_data_file_io_exception_rate", 0.3));
// Test restore after index deletion
logger.info("--> delete index");
cluster().wipeIndices("test-idx");
logger.info("--> restore index after deletion");
final RestoreSnapshotResponse restoreResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true).get();
logger.info("--> total number of simulated failures during restore: [{}]", getFailureCount("test-repo"));
final RestoreInfo restoreInfo = restoreResponse.getRestoreInfo();
assertThat(restoreInfo.totalShards(), equalTo(numShards.numPrimaries));
if (restoreInfo.successfulShards() == restoreInfo.totalShards()) {
// All shards were restored, we must find the exact number of hits
assertDocCount("test-idx", 100L);
} else {
// One or more shards failed to be restored. This can happen when there is
// only 1 data node: a shard failed because of the random IO exceptions
// during restore and then we don't allow the shard to be assigned on the
// same node again during the same reroute operation. Then another reroute
// operation is scheduled, but the RestoreInProgressAllocationDecider will
// block the shard to be assigned again because it failed during restore.
final ClusterStateResponse clusterStateResponse = client.admin().cluster().prepareState().get();
assertEquals(1, clusterStateResponse.getState().getNodes().getDataNodes().size());
assertEquals(restoreInfo.failedShards(), clusterStateResponse.getState().getRoutingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size());
}
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method testDeleteSnapshot.
public void testDeleteSnapshot() throws Exception {
final int numberOfSnapshots = between(5, 15);
Client client = client();
Path repo = randomRepoPath();
createRepository("test-repo", "fs", Settings.builder().put("location", repo).put("compress", false).put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES));
createIndex("test-idx");
ensureGreen();
int[] numberOfFiles = new int[numberOfSnapshots];
logger.info("--> creating {} snapshots ", numberOfSnapshots);
for (int i = 0; i < numberOfSnapshots; i++) {
for (int j = 0; j < 10; j++) {
index("test-idx", "_doc", Integer.toString(i * 10 + j), "foo", "bar" + i * 10 + j);
}
refresh();
createSnapshot("test-repo", "test-snap-" + i, Collections.singletonList("test-idx"));
// Store number of files after each snapshot
numberOfFiles[i] = numberOfFiles(repo);
}
assertDocCount("test-idx", 10L * numberOfSnapshots);
int numberOfFilesBeforeDeletion = numberOfFiles(repo);
logger.info("--> delete all snapshots except the first one and last one");
if (randomBoolean()) {
for (int i = 1; i < numberOfSnapshots - 1; i++) {
client.admin().cluster().prepareDeleteSnapshot("test-repo", new String[] { "test-snap-" + i }).get();
}
} else {
client.admin().cluster().prepareDeleteSnapshot("test-repo", IntStream.range(1, numberOfSnapshots - 1).mapToObj(i -> "test-snap-" + i).toArray(String[]::new)).get();
}
int numberOfFilesAfterDeletion = numberOfFiles(repo);
assertThat(numberOfFilesAfterDeletion, lessThan(numberOfFilesBeforeDeletion));
logger.info("--> delete index");
cluster().wipeIndices("test-idx");
logger.info("--> restore index");
String lastSnapshot = "test-snap-" + (numberOfSnapshots - 1);
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", lastSnapshot).setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
assertDocCount("test-idx", 10L * numberOfSnapshots);
startDeleteSnapshot("test-repo", lastSnapshot).get();
logger.info("--> make sure that number of files is back to what it was when the first snapshot was made");
assertFileCount(repo, numberOfFiles[0]);
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method testDeleteRepositoryWhileSnapshotting.
public void testDeleteRepositoryWhileSnapshotting() throws Exception {
disableRepoConsistencyCheck("This test uses a purposely broken repository so it would fail consistency checks");
Client client = client();
Path repositoryLocation = randomRepoPath();
createRepository("test-repo", "mock", Settings.builder().put("location", repositoryLocation).put("random", randomAlphaOfLength(10)).put("wait_after_unblock", 200));
// Create index on 2 nodes and make sure each node has a primary by setting no replicas
assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_replicas", 0)));
indexRandomDocs("test-idx", 100);
// Pick one node and block it
String blockedNode = blockNodeWithIndex("test-repo", "test-idx");
logger.info("--> snapshot");
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
logger.info("--> waiting for block to kick in");
waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
logger.info("--> execution was blocked on node [{}], trying to delete repository", blockedNode);
try {
client.admin().cluster().prepareDeleteRepository(randomFrom("test-repo", "test-*", "*")).execute().actionGet();
fail("shouldn't be able to delete in-use repository");
} catch (Exception ex) {
logger.info("--> in-use repository deletion failed");
assertThat(ex.getMessage(), containsString("trying to modify or unregister repository that is currently used"));
}
logger.info("--> trying to move repository to another location");
try {
client.admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(Settings.builder().put("location", repositoryLocation.resolve("test"))).get();
fail("shouldn't be able to replace in-use repository");
} catch (Exception ex) {
logger.info("--> in-use repository replacement failed");
}
logger.info("--> trying to create a repository with different name");
assertAcked(client.admin().cluster().preparePutRepository("test-repo-2").setVerify(// do not do verification itself as snapshot threads could be fully blocked
false).setType("fs").setSettings(Settings.builder().put("location", repositoryLocation.resolve("test"))));
logger.info("--> unblocking blocked node");
unblockNode("test-repo", blockedNode);
logger.info("--> waiting for completion");
logger.info("Number of failed shards [{}]", waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(600)).shardFailures().size());
logger.info("--> done");
final SnapshotInfo snapshotInfo = getSnapshot("test-repo", "test-snap");
assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfo.shardFailures().size(), equalTo(0));
logger.info("--> delete index");
cluster().wipeIndices("test-idx");
logger.info("--> replace mock repository with real one at the same location");
createRepository("test-repo", "fs", repositoryLocation);
logger.info("--> restore index");
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
assertDocCount("test-idx", 100);
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class IndexRecoveryIT method testSnapshotRecovery.
public void testSnapshotRecovery() throws Exception {
logger.info("--> start node A");
String nodeA = internalCluster().startNode();
logger.info("--> create repository");
assertAcked(client().admin().cluster().preparePutRepository(REPO_NAME).setType("fs").setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)).get());
ensureGreen();
logger.info("--> create index on node: {}", nodeA);
createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot(REPO_NAME, SNAP_NAME).setWaitForCompletion(true).setIndices(INDEX_NAME).get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
assertThat(client().admin().cluster().prepareGetSnapshots(REPO_NAME).setSnapshots(SNAP_NAME).get().getSnapshots().get(0).state(), equalTo(SnapshotState.SUCCESS));
client().admin().indices().prepareClose(INDEX_NAME).execute().actionGet();
logger.info("--> restore");
RestoreSnapshotResponse restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot(REPO_NAME, SNAP_NAME).setWaitForCompletion(true).execute().actionGet();
int totalShards = restoreSnapshotResponse.getRestoreInfo().totalShards();
assertThat(totalShards, greaterThan(0));
ensureGreen();
logger.info("--> request recoveries");
RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
Repository repository = internalCluster().getMasterNodeInstance(RepositoriesService.class).repository(REPO_NAME);
final RepositoryData repositoryData = PlainActionFuture.get(repository::getRepositoryData);
for (Map.Entry<String, List<RecoveryState>> indexRecoveryStates : response.shardRecoveryStates().entrySet()) {
assertThat(indexRecoveryStates.getKey(), equalTo(INDEX_NAME));
List<RecoveryState> recoveryStates = indexRecoveryStates.getValue();
assertThat(recoveryStates.size(), equalTo(totalShards));
for (RecoveryState recoveryState : recoveryStates) {
SnapshotRecoverySource recoverySource = new SnapshotRecoverySource(((SnapshotRecoverySource) recoveryState.getRecoverySource()).restoreUUID(), new Snapshot(REPO_NAME, createSnapshotResponse.getSnapshotInfo().snapshotId()), Version.CURRENT, repositoryData.resolveIndexId(INDEX_NAME));
assertRecoveryState(recoveryState, 0, recoverySource, true, Stage.DONE, null, nodeA);
validateIndexRecoveryState(recoveryState.getIndex());
}
}
}
use of org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse in project OpenSearch by opensearch-project.
the class AbortedRestoreIT method testAbortedRestoreAlsoAbortFileRestores.
public void testAbortedRestoreAlsoAbortFileRestores() throws Exception {
internalCluster().startClusterManagerOnlyNode();
final String dataNode = internalCluster().startDataOnlyNode();
final String indexName = "test-abort-restore";
createIndex(indexName, indexSettingsNoReplicas(1).build());
indexRandomDocs(indexName, scaledRandomIntBetween(10, 1_000));
ensureGreen();
forceMerge();
final String repositoryName = "repository";
createRepository(repositoryName, "mock");
final String snapshotName = "snapshot";
createFullSnapshot(repositoryName, snapshotName);
assertAcked(client().admin().indices().prepareDelete(indexName));
logger.info("--> blocking all data nodes for repository [{}]", repositoryName);
blockAllDataNodes(repositoryName);
failReadsAllDataNodes(repositoryName);
logger.info("--> starting restore");
final ActionFuture<RestoreSnapshotResponse> future = client().admin().cluster().prepareRestoreSnapshot(repositoryName, snapshotName).setWaitForCompletion(true).setIndices(indexName).execute();
assertBusy(() -> {
final RecoveryResponse recoveries = client().admin().indices().prepareRecoveries(indexName).setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN).setActiveOnly(true).get();
assertThat(recoveries.hasRecoveries(), is(true));
final List<RecoveryState> shardRecoveries = recoveries.shardRecoveryStates().get(indexName);
assertThat(shardRecoveries, hasSize(1));
assertThat(future.isDone(), is(false));
for (RecoveryState shardRecovery : shardRecoveries) {
assertThat(shardRecovery.getRecoverySource().getType(), equalTo(RecoverySource.Type.SNAPSHOT));
assertThat(shardRecovery.getStage(), equalTo(RecoveryState.Stage.INDEX));
}
});
final ThreadPool.Info snapshotThreadPoolInfo = threadPool(dataNode).info(ThreadPool.Names.SNAPSHOT);
assertThat(snapshotThreadPoolInfo.getMax(), greaterThan(0));
logger.info("--> waiting for snapshot thread [max={}] pool to be full", snapshotThreadPoolInfo.getMax());
waitForMaxActiveSnapshotThreads(dataNode, equalTo(snapshotThreadPoolInfo.getMax()));
logger.info("--> aborting restore by deleting the index");
assertAcked(client().admin().indices().prepareDelete(indexName));
logger.info("--> unblocking repository [{}]", repositoryName);
unblockAllDataNodes(repositoryName);
logger.info("--> restore should have failed");
final RestoreSnapshotResponse restoreSnapshotResponse = future.get();
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(1));
assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(0));
logger.info("--> waiting for snapshot thread pool to be empty");
waitForMaxActiveSnapshotThreads(dataNode, equalTo(0));
}
Aggregations