use of org.elasticsearch.action.admin.indices.recovery.RecoveryRequest in project elasticsearch by elastic.
the class RestRecoveryAction method prepareRequest.
@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
final RecoveryRequest recoveryRequest = new RecoveryRequest(Strings.splitStringByCommaToArray(request.param("index")));
recoveryRequest.detailed(request.paramAsBoolean("detailed", false));
recoveryRequest.activeOnly(request.paramAsBoolean("active_only", false));
recoveryRequest.indicesOptions(IndicesOptions.fromRequest(request, recoveryRequest.indicesOptions()));
return channel -> client.admin().indices().recoveries(recoveryRequest, new RestBuilderListener<RecoveryResponse>(channel) {
@Override
public RestResponse buildResponse(RecoveryResponse response, XContentBuilder builder) throws Exception {
response.detailed(recoveryRequest.detailed());
builder.startObject();
response.toXContent(builder, request);
builder.endObject();
return new BytesRestResponse(OK, builder);
}
});
}
use of org.elasticsearch.action.admin.indices.recovery.RecoveryRequest in project elasticsearch by elastic.
the class IndicesRequestIT method testRecovery.
public void testRecovery() {
String recoveryAction = RecoveryAction.NAME + "[n]";
interceptTransportActions(recoveryAction);
RecoveryRequest recoveryRequest = new RecoveryRequest(randomIndicesOrAliases());
internalCluster().coordOnlyNodeClient().admin().indices().recoveries(recoveryRequest).actionGet();
clearInterceptedActions();
assertSameIndices(recoveryRequest, recoveryAction);
}
use of org.elasticsearch.action.admin.indices.recovery.RecoveryRequest in project crate by crate.
the class IndexRecoveryIT method testCancelNewShardRecoveryAndUsesExistingShardCopy.
@Test
public void testCancelNewShardRecoveryAndUsesExistingShardCopy() throws Exception {
logger.info("--> start node A");
final String nodeA = internalCluster().startNode();
logger.info("--> create index on node: {}", nodeA);
createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);
logger.info("--> start node B");
// force a shard recovery from nodeA to nodeB
final String nodeB = internalCluster().startNode();
logger.info("--> add replica for {} on node: {}", INDEX_NAME, nodeB);
execute("ALTER TABLE " + INDEX_NAME + " SET (number_of_replicas=1, \"unassigned.node_left.delayed_timeout\"=0)");
ensureGreen();
logger.info("--> start node C");
final String nodeC = internalCluster().startNode();
// do sync flush to gen sync id
execute("OPTIMIZE TABLE " + INDEX_NAME);
// assertThat(client().admin().indices().prepareSyncedFlush(INDEX_NAME).get().failedShards(), equalTo(0));
// hold peer recovery on phase 2 after nodeB down
CountDownLatch phase1ReadyBlocked = new CountDownLatch(1);
CountDownLatch allowToCompletePhase1Latch = new CountDownLatch(1);
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeA);
transportService.addSendBehavior((connection, requestId, action, request, options) -> {
if (PeerRecoveryTargetService.Actions.CLEAN_FILES.equals(action)) {
phase1ReadyBlocked.countDown();
try {
allowToCompletePhase1Latch.await();
} catch (InterruptedException e) {
throw new AssertionError(e);
}
}
connection.sendRequest(requestId, action, request, options);
});
logger.info("--> restart node B");
internalCluster().restartNode(nodeB, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
phase1ReadyBlocked.await();
// nodeB stopped, peer recovery from nodeA to nodeC, it will be cancelled after nodeB get started.
var indexName = IndexParts.toIndexName(sqlExecutor.getCurrentSchema(), INDEX_NAME, null);
RecoveryResponse response = client().execute(RecoveryAction.INSTANCE, new RecoveryRequest(indexName)).actionGet();
List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(indexName);
List<RecoveryState> nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
assertThat(nodeCRecoveryStates.size(), equalTo(1));
assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, RecoverySource.PeerRecoverySource.INSTANCE, false, nodeA, nodeC);
validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
return super.onNodeStopped(nodeName);
}
});
// wait for peer recovery from nodeA to nodeB which is a no-op recovery so it skips the CLEAN_FILES stage and hence is not blocked
ensureGreen();
allowToCompletePhase1Latch.countDown();
transportService.clearAllRules();
// make sure nodeA has primary and nodeB has replica
ClusterState state = client().admin().cluster().prepareState().get().getState();
List<ShardRouting> startedShards = state.routingTable().shardsWithState(ShardRoutingState.STARTED);
assertThat(startedShards.size(), equalTo(2));
for (ShardRouting shardRouting : startedShards) {
if (shardRouting.primary()) {
assertThat(state.nodes().get(shardRouting.currentNodeId()).getName(), equalTo(nodeA));
} else {
assertThat(state.nodes().get(shardRouting.currentNodeId()).getName(), equalTo(nodeB));
}
}
}
use of org.elasticsearch.action.admin.indices.recovery.RecoveryRequest in project crate by crate.
the class IndexRecoveryIT method testRecoverLocallyUpToGlobalCheckpoint.
@Test
public void testRecoverLocallyUpToGlobalCheckpoint() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(2);
List<String> nodes = randomSubsetOf(2, StreamSupport.stream(clusterService().state().nodes().getDataNodes().spliterator(), false).map(node -> node.value.getName()).collect(Collectors.toSet()));
String indexName = "test";
execute("CREATE TABLE doc.test (num INT)" + " CLUSTERED INTO 1 SHARDS" + " WITH (" + " number_of_replicas = 1," + " \"global_checkpoint_sync.interval\"='24h'," + " \"routing.allocation.include._name\"='" + String.join(",", nodes) + "'" + " )");
ensureGreen(indexName);
int numDocs = randomIntBetween(1, 100);
var args = new Object[numDocs][];
for (int i = 0; i < numDocs; i++) {
args[i] = new Object[] { i };
}
execute("INSERT INTO doc.test (num) VALUES (?)", args);
refresh(indexName);
String failingNode = randomFrom(nodes);
PlainActionFuture<StartRecoveryRequest> startRecoveryRequestFuture = new PlainActionFuture<>();
// Peer recovery fails if the primary does not see the recovering replica in the replication group (when the cluster state
// update on the primary is delayed). To verify the local recovery stats, we have to manually remember this value in the
// first try because the local recovery happens once and its stats is reset when the recovery fails.
SetOnce<Integer> localRecoveredOps = new SetOnce<>();
for (String node : nodes) {
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, node);
transportService.addSendBehavior((connection, requestId, action, request, options) -> {
if (action.equals(PeerRecoverySourceService.Actions.START_RECOVERY)) {
final RecoveryState recoveryState = internalCluster().getInstance(IndicesService.class, failingNode).getShardOrNull(new ShardId(resolveIndex(indexName), 0)).recoveryState();
assertThat(recoveryState.getTranslog().recoveredOperations(), equalTo(recoveryState.getTranslog().totalLocal()));
if (startRecoveryRequestFuture.isDone()) {
assertThat(recoveryState.getTranslog().totalLocal(), equalTo(0));
recoveryState.getTranslog().totalLocal(localRecoveredOps.get());
recoveryState.getTranslog().incrementRecoveredOperations(localRecoveredOps.get());
} else {
localRecoveredOps.set(recoveryState.getTranslog().totalLocal());
startRecoveryRequestFuture.onResponse((StartRecoveryRequest) request);
}
}
if (action.equals(PeerRecoveryTargetService.Actions.FILE_CHUNK)) {
RetentionLeases retentionLeases = internalCluster().getInstance(IndicesService.class, node).indexServiceSafe(resolveIndex(indexName)).getShard(0).getRetentionLeases();
throw new AssertionError("expect an operation-based recovery:" + "retention leases" + Strings.toString(retentionLeases) + "]");
}
connection.sendRequest(requestId, action, request, options);
});
}
IndexShard shard = internalCluster().getInstance(IndicesService.class, failingNode).getShardOrNull(new ShardId(resolveIndex(indexName), 0));
final long lastSyncedGlobalCheckpoint = shard.getLastSyncedGlobalCheckpoint();
final long localCheckpointOfSafeCommit;
try (Engine.IndexCommitRef safeCommitRef = shard.acquireSafeIndexCommit()) {
localCheckpointOfSafeCommit = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(safeCommitRef.getIndexCommit().getUserData().entrySet()).localCheckpoint;
}
final long maxSeqNo = shard.seqNoStats().getMaxSeqNo();
shard.failShard("test", new IOException("simulated"));
StartRecoveryRequest startRecoveryRequest = startRecoveryRequestFuture.actionGet();
logger.info("--> start recovery request: starting seq_no {}, commit {}", startRecoveryRequest.startingSeqNo(), startRecoveryRequest.metadataSnapshot().getCommitUserData());
SequenceNumbers.CommitInfo commitInfoAfterLocalRecovery = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(startRecoveryRequest.metadataSnapshot().getCommitUserData().entrySet());
assertThat(commitInfoAfterLocalRecovery.localCheckpoint, equalTo(lastSyncedGlobalCheckpoint));
assertThat(commitInfoAfterLocalRecovery.maxSeqNo, equalTo(lastSyncedGlobalCheckpoint));
assertThat(startRecoveryRequest.startingSeqNo(), equalTo(lastSyncedGlobalCheckpoint + 1));
ensureGreen(indexName);
assertThat((long) localRecoveredOps.get(), equalTo(lastSyncedGlobalCheckpoint - localCheckpointOfSafeCommit));
for (var recoveryState : client().execute(RecoveryAction.INSTANCE, new RecoveryRequest()).get().shardRecoveryStates().get(indexName)) {
if (startRecoveryRequest.targetNode().equals(recoveryState.getTargetNode())) {
assertThat("expect an operation-based recovery", recoveryState.getIndex().fileDetails().values(), empty());
assertThat("total recovered translog operations must include both local and remote recovery", recoveryState.getTranslog().recoveredOperations(), greaterThanOrEqualTo(Math.toIntExact(maxSeqNo - localCheckpointOfSafeCommit)));
}
}
for (String node : nodes) {
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, node);
transportService.clearAllRules();
}
}
use of org.elasticsearch.action.admin.indices.recovery.RecoveryRequest in project crate by crate.
the class IndexRecoveryIT method testHistoryRetention.
@Test
public void testHistoryRetention() throws Exception {
internalCluster().startNodes(3);
final String indexName = IndexParts.toIndexName(sqlExecutor.getCurrentSchema(), "test", null);
execute("CREATE TABLE test (id int) CLUSTERED INTO 1 SHARDS " + "WITH (" + " number_of_replicas=2," + " \"recovery.file_based_threshold\" = 1.0" + ")");
// Perform some replicated operations so the replica isn't simply empty, because ops-based recovery isn't better in that case
int numDocs = scaledRandomIntBetween(25, 250);
var args = new Object[numDocs][];
for (int i = 0; i < numDocs; i++) {
args[i] = new Object[] { i };
}
execute("INSERT INTO test (id) VALUES (?)", args);
if (randomBoolean()) {
execute("OPTIMIZE TABLE test");
}
ensureGreen();
String firstNodeToStop = randomFrom(internalCluster().getNodeNames());
Settings firstNodeToStopDataPathSettings = internalCluster().dataPathSettings(firstNodeToStop);
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(firstNodeToStop));
String secondNodeToStop = randomFrom(internalCluster().getNodeNames());
Settings secondNodeToStopDataPathSettings = internalCluster().dataPathSettings(secondNodeToStop);
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(secondNodeToStop));
final long desyncNanoTime = System.nanoTime();
// noinspection StatementWithEmptyBody
while (System.nanoTime() <= desyncNanoTime) {
// time passes
}
final int numNewDocs = scaledRandomIntBetween(25, 250);
args = new Object[numNewDocs][];
for (int i = 0; i < numNewDocs; i++) {
args[i] = new Object[] { i };
}
execute("INSERT INTO test (id) VALUES (?)", args);
refresh();
// Flush twice to update the safe commit's local checkpoint
execute("OPTIMIZE TABLE test");
execute("OPTIMIZE TABLE test");
execute("ALTER TABLE test SET (number_of_replicas = 1)");
internalCluster().startNode(randomFrom(firstNodeToStopDataPathSettings, secondNodeToStopDataPathSettings));
ensureGreen(indexName);
final RecoveryResponse recoveryResponse = client().admin().indices().recoveries(new RecoveryRequest(indexName)).get();
final List<RecoveryState> recoveryStates = recoveryResponse.shardRecoveryStates().get(indexName);
recoveryStates.removeIf(r -> r.getTimer().getStartNanoTime() <= desyncNanoTime);
assertThat(recoveryStates, hasSize(1));
assertThat(recoveryStates.get(0).getIndex().totalFileCount(), is(0));
assertThat(recoveryStates.get(0).getTranslog().recoveredOperations(), greaterThan(0));
}
Aggregations