use of org.opensearch.cluster.routing.allocation.ShardAllocationDecision in project OpenSearch by opensearch-project.
the class AllocationIdIT method checkNoValidShardCopy.
private void checkNoValidShardCopy(String indexName, ShardId shardId) throws Exception {
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(shardId.id()).setPrimary(true).get().getExplanation();
final ShardAllocationDecision shardAllocationDecision = explanation.getShardAllocationDecision();
assertThat(shardAllocationDecision.isDecisionTaken(), equalTo(true));
assertThat(shardAllocationDecision.getAllocateDecision().getAllocationDecision(), equalTo(AllocationDecision.NO_VALID_SHARD_COPY));
});
}
use of org.opensearch.cluster.routing.allocation.ShardAllocationDecision in project OpenSearch by opensearch-project.
the class BalancedShardsAllocator method decideShardAllocation.
@Override
public ShardAllocationDecision decideShardAllocation(final ShardRouting shard, final RoutingAllocation allocation) {
Balancer balancer = new Balancer(logger, allocation, movePrimaryFirst, weightFunction, threshold);
AllocateUnassignedDecision allocateUnassignedDecision = AllocateUnassignedDecision.NOT_TAKEN;
MoveDecision moveDecision = MoveDecision.NOT_TAKEN;
if (shard.unassigned()) {
allocateUnassignedDecision = balancer.decideAllocateUnassigned(shard);
} else {
moveDecision = balancer.decideMove(shard);
if (moveDecision.isDecisionTaken() && moveDecision.canRemain()) {
MoveDecision rebalanceDecision = balancer.decideRebalance(shard);
moveDecision = rebalanceDecision.withRemainDecision(moveDecision.getCanRemainDecision());
}
}
return new ShardAllocationDecision(allocateUnassignedDecision, moveDecision);
}
use of org.opensearch.cluster.routing.allocation.ShardAllocationDecision in project OpenSearch by opensearch-project.
the class ClusterAllocationExplanationTests method randomClusterAllocationExplanation.
private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) {
ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0), assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED);
DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT) : null;
ShardAllocationDecision shardAllocationDecision;
if (assignedShard) {
MoveDecision moveDecision = MoveDecision.cannotRebalance(Decision.YES, AllocationDecision.NO, 3, null).withRemainDecision(Decision.YES);
shardAllocationDecision = new ShardAllocationDecision(AllocateUnassignedDecision.NOT_TAKEN, moveDecision);
} else {
AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null);
shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
}
return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision);
}
use of org.opensearch.cluster.routing.allocation.ShardAllocationDecision in project OpenSearch by opensearch-project.
the class RemoveCorruptedShardDataCommandIT method testCorruptTranslogTruncation.
public void testCorruptTranslogTruncation() throws Exception {
internalCluster().startNodes(2);
final String node1 = internalCluster().getNodeNames()[0];
final String node2 = internalCluster().getNodeNames()[1];
final String indexName = "test";
assertAcked(prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "-1").put(MockEngineSupport.DISABLE_FLUSH_ON_CLOSE.getKey(), // never flush - always recover from translog
true).put("index.routing.allocation.exclude._name", node2)));
ensureYellow();
assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().putNull("index.routing.allocation.exclude._name")));
ensureGreen();
// Index some documents
int numDocsToKeep = randomIntBetween(10, 100);
logger.info("--> indexing [{}] docs to be kept", numDocsToKeep);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocsToKeep];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex(indexName).setSource("foo", "bar");
}
indexRandom(false, false, false, Arrays.asList(builders));
flush(indexName);
disableTranslogFlush(indexName);
// having no extra docs is an interesting case for seq no based recoveries - test it more often
int numDocsToTruncate = randomBoolean() ? 0 : randomIntBetween(0, 100);
logger.info("--> indexing [{}] more doc to be truncated", numDocsToTruncate);
builders = new IndexRequestBuilder[numDocsToTruncate];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex(indexName).setSource("foo", "bar");
}
indexRandom(false, false, false, Arrays.asList(builders));
RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
MockTerminal terminal = new MockTerminal();
OptionParser parser = command.getParser();
if (randomBoolean() && numDocsToTruncate > 0) {
// flush the replica, so it will have more docs than what the primary will have
Index index = resolveIndex(indexName);
IndexShard replica = internalCluster().getInstance(IndicesService.class, node2).getShardOrNull(new ShardId(index, 0));
replica.flush(new FlushRequest());
logger.info("--> performed extra flushing on replica");
}
final Settings node1PathSettings = internalCluster().dataPathSettings(node1);
final Settings node2PathSettings = internalCluster().dataPathSettings(node2);
// shut down the replica node to be tested later
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node2));
final Path translogDir = getPathToShardData(indexName, ShardPath.TRANSLOG_FOLDER_NAME);
final Path indexDir = getPathToShardData(indexName, ShardPath.INDEX_FOLDER_NAME);
// Restart the single node
logger.info("--> restarting node");
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
logger.info("--> corrupting translog on node {}", nodeName);
TestTranslog.corruptRandomTranslogFile(logger, random(), translogDir);
return super.onNodeStopped(nodeName);
}
});
// all shards should be failed due to a corrupted translog
assertBusy(() -> {
final UnassignedInfo unassignedInfo = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation().getUnassignedInfo();
assertThat(unassignedInfo.getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
assertThat(ExceptionsHelper.unwrap(unassignedInfo.getFailure(), TranslogCorruptedException.class), not(nullValue()));
});
// have to shut down primary node - otherwise node lock is present
internalCluster().restartNode(node1, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
assertBusy(() -> {
logger.info("--> checking that lock has been released for {}", indexDir);
// noinspection EmptyTryBlock since we're just trying to obtain the lock
try (Directory dir = FSDirectory.open(indexDir, NativeFSLockFactory.INSTANCE);
Lock ignored = dir.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
} catch (LockObtainFailedException lofe) {
logger.info("--> failed acquiring lock for {}", indexDir);
throw new AssertionError("still waiting for lock release at [" + indexDir + "]", lofe);
} catch (IOException ioe) {
throw new AssertionError("unexpected IOException [" + indexDir + "]", ioe);
}
});
final Environment environment = TestEnvironment.newEnvironment(Settings.builder().put(internalCluster().getDefaultSettings()).put(node1PathSettings).build());
terminal.addTextInput("y");
OptionSet options = parser.parse("-d", translogDir.toAbsolutePath().toString());
logger.info("--> running command for [{}]", translogDir.toAbsolutePath());
command.execute(terminal, options, environment);
logger.info("--> output:\n{}", terminal.getOutput());
return super.onNodeStopped(nodeName);
}
});
String primaryNodeId = null;
final ClusterState state = client().admin().cluster().prepareState().get().getState();
final DiscoveryNodes nodes = state.nodes();
for (ObjectObjectCursor<String, DiscoveryNode> cursor : nodes.getNodes()) {
final String name = cursor.value.getName();
if (name.equals(node1)) {
primaryNodeId = cursor.key;
break;
}
}
assertThat(primaryNodeId, notNullValue());
assertThat(terminal.getOutput(), containsString("allocate_stale_primary"));
assertThat(terminal.getOutput(), containsString("\"node\" : \"" + primaryNodeId + "\""));
// there is only _stale_ primary (due to new allocation id)
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation();
final ShardAllocationDecision shardAllocationDecision = explanation.getShardAllocationDecision();
assertThat(shardAllocationDecision.isDecisionTaken(), equalTo(true));
assertThat(shardAllocationDecision.getAllocateDecision().getAllocationDecision(), equalTo(AllocationDecision.NO_VALID_SHARD_COPY));
});
client().admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, primaryNodeId, true)).get();
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation();
assertThat(explanation.getCurrentNode(), notNullValue());
assertThat(explanation.getShardState(), equalTo(ShardRoutingState.STARTED));
});
ensureYellow(indexName);
// Run a search and make sure it succeeds
assertHitCount(client().prepareSearch(indexName).setQuery(matchAllQuery()).get(), numDocsToKeep);
logger.info("--> starting the replica node to test recovery");
internalCluster().startNode(node2PathSettings);
ensureGreen(indexName);
for (String node : internalCluster().nodesInclude(indexName)) {
SearchRequestBuilder q = client().prepareSearch(indexName).setPreference("_only_nodes:" + node).setQuery(matchAllQuery());
assertHitCount(q.get(), numDocsToKeep);
}
final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(indexName).setActiveOnly(false).get();
final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get(indexName).stream().filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
// Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
final SeqNoStats seqNoStats = getSeqNoStats(indexName, 0);
assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
}
use of org.opensearch.cluster.routing.allocation.ShardAllocationDecision in project OpenSearch by opensearch-project.
the class RemoveCorruptedShardDataCommandIT method testCorruptIndex.
public void testCorruptIndex() throws Exception {
final String node = internalCluster().startNode();
final String indexName = "index42";
assertAcked(prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "-1").put(MockEngineSupport.DISABLE_FLUSH_ON_CLOSE.getKey(), true).put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), "checksum")));
// index some docs in several segments
int numDocs = 0;
for (int k = 0, attempts = randomIntBetween(5, 10); k < attempts; k++) {
final int numExtraDocs = between(10, 100);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numExtraDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex(indexName).setSource("foo", "bar");
}
numDocs += numExtraDocs;
indexRandom(false, false, false, Arrays.asList(builders));
flush(indexName);
}
logger.info("--> indexed {} docs", numDocs);
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
final MockTerminal terminal = new MockTerminal();
final OptionParser parser = command.getParser();
final Settings nodePathSettings = internalCluster().dataPathSettings(node);
final Environment environment = TestEnvironment.newEnvironment(Settings.builder().put(internalCluster().getDefaultSettings()).put(nodePathSettings).build());
final OptionSet options = parser.parse("-index", indexName, "-shard-id", "0");
// Try running it before the node is stopped (and shard is closed)
try {
command.execute(terminal, options, environment);
fail("expected the command to fail as node is locked");
} catch (Exception e) {
assertThat(e.getMessage(), allOf(containsString("failed to lock node's directory"), containsString("is OpenSearch still running?")));
}
final Path indexDir = getPathToShardData(indexName, ShardPath.INDEX_FOLDER_NAME);
internalCluster().restartNode(node, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
// Try running it before the shard is corrupted, it should flip out because there is no corruption file marker
try {
command.execute(terminal, options, environment);
fail("expected the command to fail as there is no corruption file marker");
} catch (Exception e) {
assertThat(e.getMessage(), startsWith("Shard does not seem to be corrupted at"));
}
CorruptionUtils.corruptIndex(random(), indexDir, false);
return super.onNodeStopped(nodeName);
}
});
// shard should be failed due to a corrupted index
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation();
final ShardAllocationDecision shardAllocationDecision = explanation.getShardAllocationDecision();
assertThat(shardAllocationDecision.isDecisionTaken(), equalTo(true));
assertThat(shardAllocationDecision.getAllocateDecision().getAllocationDecision(), equalTo(AllocationDecision.NO_VALID_SHARD_COPY));
});
internalCluster().restartNode(node, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
terminal.addTextInput("y");
command.execute(terminal, options, environment);
return super.onNodeStopped(nodeName);
}
});
waitNoPendingTasksOnAll();
String nodeId = null;
final ClusterState state = client().admin().cluster().prepareState().get().getState();
final DiscoveryNodes nodes = state.nodes();
for (ObjectObjectCursor<String, DiscoveryNode> cursor : nodes.getNodes()) {
final String name = cursor.value.getName();
if (name.equals(node)) {
nodeId = cursor.key;
break;
}
}
assertThat(nodeId, notNullValue());
logger.info("--> output:\n{}", terminal.getOutput());
assertThat(terminal.getOutput(), containsString("allocate_stale_primary"));
assertThat(terminal.getOutput(), containsString("\"node\" : \"" + nodeId + "\""));
// there is only _stale_ primary (due to new allocation id)
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation();
final ShardAllocationDecision shardAllocationDecision = explanation.getShardAllocationDecision();
assertThat(shardAllocationDecision.isDecisionTaken(), equalTo(true));
assertThat(shardAllocationDecision.getAllocateDecision().getAllocationDecision(), equalTo(AllocationDecision.NO_VALID_SHARD_COPY));
});
client().admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, nodeId, true)).get();
assertBusy(() -> {
final ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex(indexName).setShard(0).setPrimary(true).get().getExplanation();
assertThat(explanation.getCurrentNode(), notNullValue());
assertThat(explanation.getShardState(), equalTo(ShardRoutingState.STARTED));
});
final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
final Matcher matcher = pattern.matcher(terminal.getOutput());
assertThat(matcher.find(), equalTo(true));
final int expectedNumDocs = numDocs - Integer.parseInt(matcher.group("docs"));
ensureGreen(indexName);
assertHitCount(client().prepareSearch(indexName).setQuery(matchAllQuery()).get(), expectedNumDocs);
}
Aggregations