use of org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource in project OpenSearch by opensearch-project.
the class IndexRecoveryIT method testSnapshotRecovery.
public void testSnapshotRecovery() throws Exception {
logger.info("--> start node A");
String nodeA = internalCluster().startNode();
logger.info("--> create repository");
assertAcked(client().admin().cluster().preparePutRepository(REPO_NAME).setType("fs").setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)).get());
ensureGreen();
logger.info("--> create index on node: {}", nodeA);
createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot(REPO_NAME, SNAP_NAME).setWaitForCompletion(true).setIndices(INDEX_NAME).get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
assertThat(client().admin().cluster().prepareGetSnapshots(REPO_NAME).setSnapshots(SNAP_NAME).get().getSnapshots().get(0).state(), equalTo(SnapshotState.SUCCESS));
client().admin().indices().prepareClose(INDEX_NAME).execute().actionGet();
logger.info("--> restore");
RestoreSnapshotResponse restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot(REPO_NAME, SNAP_NAME).setWaitForCompletion(true).execute().actionGet();
int totalShards = restoreSnapshotResponse.getRestoreInfo().totalShards();
assertThat(totalShards, greaterThan(0));
ensureGreen();
logger.info("--> request recoveries");
RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
Repository repository = internalCluster().getMasterNodeInstance(RepositoriesService.class).repository(REPO_NAME);
final RepositoryData repositoryData = PlainActionFuture.get(repository::getRepositoryData);
for (Map.Entry<String, List<RecoveryState>> indexRecoveryStates : response.shardRecoveryStates().entrySet()) {
assertThat(indexRecoveryStates.getKey(), equalTo(INDEX_NAME));
List<RecoveryState> recoveryStates = indexRecoveryStates.getValue();
assertThat(recoveryStates.size(), equalTo(totalShards));
for (RecoveryState recoveryState : recoveryStates) {
SnapshotRecoverySource recoverySource = new SnapshotRecoverySource(((SnapshotRecoverySource) recoveryState.getRecoverySource()).restoreUUID(), new Snapshot(REPO_NAME, createSnapshotResponse.getSnapshotInfo().snapshotId()), Version.CURRENT, repositoryData.resolveIndexId(INDEX_NAME));
assertRecoveryState(recoveryState, 0, recoverySource, true, Stage.DONE, null, nodeA);
validateIndexRecoveryState(recoveryState.getIndex());
}
}
}
use of org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource in project OpenSearch by opensearch-project.
the class IndexShard method startRecovery.
public void startRecovery(RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, PeerRecoveryTargetService.RecoveryListener recoveryListener, RepositoriesService repositoriesService, Consumer<MappingMetadata> mappingUpdateConsumer, IndicesService indicesService) {
// }
assert recoveryState.getRecoverySource().equals(shardRouting.recoverySource());
switch(recoveryState.getRecoverySource().getType()) {
case EMPTY_STORE:
case EXISTING_STORE:
executeRecovery("from store", recoveryState, recoveryListener, this::recoverFromStore);
break;
case PEER:
try {
markAsRecovering("from " + recoveryState.getSourceNode(), recoveryState);
recoveryTargetService.startRecovery(this, recoveryState.getSourceNode(), recoveryListener);
} catch (Exception e) {
failShard("corrupted preexisting index", e);
recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true);
}
break;
case SNAPSHOT:
final String repo = ((SnapshotRecoverySource) recoveryState.getRecoverySource()).snapshot().getRepository();
executeRecovery("from snapshot", recoveryState, recoveryListener, l -> restoreFromRepository(repositoriesService.repository(repo), l));
break;
case LOCAL_SHARDS:
final IndexMetadata indexMetadata = indexSettings().getIndexMetadata();
final Index resizeSourceIndex = indexMetadata.getResizeSourceIndex();
final List<IndexShard> startedShards = new ArrayList<>();
final IndexService sourceIndexService = indicesService.indexService(resizeSourceIndex);
final Set<ShardId> requiredShards;
final int numShards;
if (sourceIndexService != null) {
requiredShards = IndexMetadata.selectRecoverFromShards(shardId().id(), sourceIndexService.getMetadata(), indexMetadata.getNumberOfShards());
for (IndexShard shard : sourceIndexService) {
if (shard.state() == IndexShardState.STARTED && requiredShards.contains(shard.shardId())) {
startedShards.add(shard);
}
}
numShards = requiredShards.size();
} else {
numShards = -1;
requiredShards = Collections.emptySet();
}
if (numShards == startedShards.size()) {
assert requiredShards.isEmpty() == false;
executeRecovery("from local shards", recoveryState, recoveryListener, l -> recoverFromLocalShards(mappingUpdateConsumer, startedShards.stream().filter((s) -> requiredShards.contains(s.shardId())).collect(Collectors.toList()), l));
} else {
final RuntimeException e;
if (numShards == -1) {
e = new IndexNotFoundException(resizeSourceIndex);
} else {
e = new IllegalStateException("not all required shards of index " + resizeSourceIndex + " are started yet, expected " + numShards + " found " + startedShards.size() + " can't recover shard " + shardId());
}
throw e;
}
break;
default:
throw new IllegalArgumentException("Unknown recovery source " + recoveryState.getRecoverySource());
}
}
use of org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource in project OpenSearch by opensearch-project.
the class NodeVersionAllocationDeciderTests method testMessages.
public void testMessages() {
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
RoutingNode newNode = new RoutingNode("newNode", newNode("newNode", Version.CURRENT));
RoutingNode oldNode = new RoutingNode("oldNode", newNode("oldNode", VersionUtils.getPreviousVersion()));
final org.opensearch.cluster.ClusterName clusterName = org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY);
ClusterState clusterState = ClusterState.builder(clusterName).metadata(metadata).routingTable(initialRoutingTable).nodes(DiscoveryNodes.builder().add(newNode.node()).add(oldNode.node())).build();
final ShardId shardId = clusterState.routingTable().index("test").shard(0).getShardId();
final ShardRouting primaryShard = clusterState.routingTable().shardRoutingTable(shardId).primaryShard();
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().get(0);
RoutingAllocation routingAllocation = new RoutingAllocation(null, clusterState.getRoutingNodes(), clusterState, null, null, 0);
routingAllocation.debugDecision(true);
final NodeVersionAllocationDecider allocationDecider = new NodeVersionAllocationDecider();
Decision decision = allocationDecider.canAllocate(primaryShard, newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("the primary shard is new or already existed on the node"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.initialize(primaryShard, "oldNode"), newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("can relocate primary shard from a node with version [" + oldNode.node().getVersion() + "] to a node with equal-or-newer version [" + newNode.node().getVersion() + "]"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.initialize(primaryShard, "newNode"), oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("cannot relocate primary shard from a node with version [" + newNode.node().getVersion() + "] to a node with older version [" + oldNode.node().getVersion() + "]"));
final IndexId indexId = new IndexId("test", UUIDs.randomBase64UUID(random()));
final SnapshotRecoverySource newVersionSnapshot = new SnapshotRecoverySource(UUIDs.randomBase64UUID(), new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID())), newNode.node().getVersion(), indexId);
final SnapshotRecoverySource oldVersionSnapshot = new SnapshotRecoverySource(UUIDs.randomBase64UUID(), new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID())), oldNode.node().getVersion(), indexId);
decision = allocationDecider.canAllocate(ShardRoutingHelper.newWithRestoreSource(primaryShard, newVersionSnapshot), oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("node version [" + oldNode.node().getVersion() + "] is older than the snapshot version [" + newNode.node().getVersion() + "]"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.newWithRestoreSource(primaryShard, oldVersionSnapshot), newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("node version [" + newNode.node().getVersion() + "] is the same or newer than snapshot version [" + oldNode.node().getVersion() + "]"));
final RoutingChangesObserver routingChangesObserver = new RoutingChangesObserver.AbstractRoutingChangesObserver();
final RoutingNodes routingNodes = new RoutingNodes(clusterState, false);
final ShardRouting startedPrimary = routingNodes.startShard(logger, routingNodes.initializeShard(primaryShard, "newNode", null, 0, routingChangesObserver), routingChangesObserver);
routingAllocation = new RoutingAllocation(null, routingNodes, clusterState, null, null, 0);
routingAllocation.debugDecision(true);
decision = allocationDecider.canAllocate(replicaShard, oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("cannot allocate replica shard to a node with version [" + oldNode.node().getVersion() + "] since this is older than the primary version [" + newNode.node().getVersion() + "]"));
routingNodes.startShard(logger, routingNodes.relocateShard(startedPrimary, "oldNode", 0, routingChangesObserver).v2(), routingChangesObserver);
routingAllocation = new RoutingAllocation(null, routingNodes, clusterState, null, null, 0);
routingAllocation.debugDecision(true);
decision = allocationDecider.canAllocate(replicaShard, newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("can allocate replica shard to a node with version [" + newNode.node().getVersion() + "] since this is equal-or-newer than the primary version [" + oldNode.node().getVersion() + "]"));
}
use of org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource in project OpenSearch by opensearch-project.
the class RestRecoveryActionTests method testRestRecoveryAction.
public void testRestRecoveryAction() {
final RestCatRecoveryAction action = new RestCatRecoveryAction();
final int totalShards = randomIntBetween(1, 32);
final int successfulShards = Math.max(0, totalShards - randomIntBetween(1, 2));
final int failedShards = totalShards - successfulShards;
final Map<String, List<RecoveryState>> shardRecoveryStates = new HashMap<>();
final List<RecoveryState> recoveryStates = new ArrayList<>();
for (int i = 0; i < successfulShards; i++) {
final RecoveryState state = mock(RecoveryState.class);
when(state.getShardId()).thenReturn(new ShardId(new Index("index", "_na_"), i));
final RecoveryState.Timer timer = mock(RecoveryState.Timer.class);
final long startTime = randomLongBetween(0, new Date().getTime());
when(timer.startTime()).thenReturn(startTime);
final long time = randomLongBetween(1000000, 10 * 1000000);
when(timer.time()).thenReturn(time);
when(timer.stopTime()).thenReturn(startTime + time);
when(state.getTimer()).thenReturn(timer);
when(state.getRecoverySource()).thenReturn(TestShardRouting.randomRecoverySource());
when(state.getStage()).thenReturn(randomFrom(RecoveryState.Stage.values()));
final DiscoveryNode sourceNode = randomBoolean() ? mock(DiscoveryNode.class) : null;
if (sourceNode != null) {
when(sourceNode.getHostName()).thenReturn(randomAlphaOfLength(8));
}
when(state.getSourceNode()).thenReturn(sourceNode);
final DiscoveryNode targetNode = mock(DiscoveryNode.class);
when(targetNode.getHostName()).thenReturn(randomAlphaOfLength(8));
when(state.getTargetNode()).thenReturn(targetNode);
RecoveryState.Index index = mock(RecoveryState.Index.class);
final int totalRecoveredFiles = randomIntBetween(1, 64);
when(index.totalRecoverFiles()).thenReturn(totalRecoveredFiles);
final int recoveredFileCount = randomIntBetween(0, totalRecoveredFiles);
when(index.recoveredFileCount()).thenReturn(recoveredFileCount);
when(index.recoveredFilesPercent()).thenReturn((100f * recoveredFileCount) / totalRecoveredFiles);
when(index.totalFileCount()).thenReturn(randomIntBetween(totalRecoveredFiles, 2 * totalRecoveredFiles));
final int totalRecoveredBytes = randomIntBetween(1, 1 << 24);
when(index.totalRecoverBytes()).thenReturn((long) totalRecoveredBytes);
final int recoveredBytes = randomIntBetween(0, totalRecoveredBytes);
when(index.recoveredBytes()).thenReturn((long) recoveredBytes);
when(index.recoveredBytesPercent()).thenReturn((100f * recoveredBytes) / totalRecoveredBytes);
when(index.totalRecoverBytes()).thenReturn((long) randomIntBetween(totalRecoveredBytes, 2 * totalRecoveredBytes));
when(state.getIndex()).thenReturn(index);
final RecoveryState.Translog translog = mock(RecoveryState.Translog.class);
final int translogOps = randomIntBetween(0, 1 << 18);
when(translog.totalOperations()).thenReturn(translogOps);
final int translogOpsRecovered = randomIntBetween(0, translogOps);
when(translog.recoveredOperations()).thenReturn(translogOpsRecovered);
when(translog.recoveredPercent()).thenReturn(translogOps == 0 ? 100f : (100f * translogOpsRecovered / translogOps));
when(state.getTranslog()).thenReturn(translog);
recoveryStates.add(state);
}
final List<RecoveryState> shuffle = new ArrayList<>(recoveryStates);
Randomness.shuffle(shuffle);
shardRecoveryStates.put("index", shuffle);
final List<DefaultShardOperationFailedException> shardFailures = new ArrayList<>();
final RecoveryResponse response = new RecoveryResponse(totalShards, successfulShards, failedShards, shardRecoveryStates, shardFailures);
final Table table = action.buildRecoveryTable(null, response);
assertNotNull(table);
List<Table.Cell> headers = table.getHeaders();
final List<String> expectedHeaders = Arrays.asList("index", "shard", "start_time", "start_time_millis", "stop_time", "stop_time_millis", "time", "type", "stage", "source_host", "source_node", "target_host", "target_node", "repository", "snapshot", "files", "files_recovered", "files_percent", "files_total", "bytes", "bytes_recovered", "bytes_percent", "bytes_total", "translog_ops", "translog_ops_recovered", "translog_ops_percent");
for (int i = 0; i < expectedHeaders.size(); i++) {
assertThat(headers.get(i).value, equalTo(expectedHeaders.get(i)));
}
assertThat(table.getRows().size(), equalTo(successfulShards));
for (int i = 0; i < successfulShards; i++) {
final RecoveryState state = recoveryStates.get(i);
final List<Object> expectedValues = Arrays.asList("index", i, XContentOpenSearchExtension.DEFAULT_DATE_PRINTER.print(state.getTimer().startTime()), state.getTimer().startTime(), XContentOpenSearchExtension.DEFAULT_DATE_PRINTER.print(state.getTimer().stopTime()), state.getTimer().stopTime(), new TimeValue(state.getTimer().time()), state.getRecoverySource().getType().name().toLowerCase(Locale.ROOT), state.getStage().name().toLowerCase(Locale.ROOT), state.getSourceNode() == null ? "n/a" : state.getSourceNode().getHostName(), state.getSourceNode() == null ? "n/a" : state.getSourceNode().getName(), state.getTargetNode().getHostName(), state.getTargetNode().getName(), state.getRecoverySource() == null || state.getRecoverySource().getType() != RecoverySource.Type.SNAPSHOT ? "n/a" : ((SnapshotRecoverySource) state.getRecoverySource()).snapshot().getRepository(), state.getRecoverySource() == null || state.getRecoverySource().getType() != RecoverySource.Type.SNAPSHOT ? "n/a" : ((SnapshotRecoverySource) state.getRecoverySource()).snapshot().getSnapshotId().getName(), state.getIndex().totalRecoverFiles(), state.getIndex().recoveredFileCount(), percent(state.getIndex().recoveredFilesPercent()), state.getIndex().totalFileCount(), state.getIndex().totalRecoverBytes(), state.getIndex().recoveredBytes(), percent(state.getIndex().recoveredBytesPercent()), state.getIndex().totalBytes(), state.getTranslog().totalOperations(), state.getTranslog().recoveredOperations(), percent(state.getTranslog().recoveredPercent()));
final List<Table.Cell> cells = table.getRows().get(i);
for (int j = 0; j < expectedValues.size(); j++) {
assertThat(cells.get(j).value, equalTo(expectedValues.get(j)));
}
}
}
use of org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource in project OpenSearch by opensearch-project.
the class UnassignedInfoTests method testNewIndexRestored.
public void testNewIndexRestored() {
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(RoutingTable.builder().addAsNewRestore(metadata.index("test"), new SnapshotRecoverySource(UUIDs.randomBase64UUID(), new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID())), Version.CURRENT, new IndexId("test", UUIDs.randomBase64UUID(random()))), new IntHashSet()).build()).build();
for (ShardRouting shard : clusterState.getRoutingNodes().shardsWithState(UNASSIGNED)) {
assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.NEW_INDEX_RESTORED));
}
}
Aggregations