use of org.opensearch.env.ShardLockObtainFailedException in project OpenSearch by opensearch-project.
the class IndicesService method deleteShardStore.
/**
* This method deletes the shard contents on disk for the given shard ID. This method will fail if the shard deleting
* is prevented by {@link #canDeleteShardContent(ShardId, IndexSettings)}
* of if the shards lock can not be acquired.
*
* On data nodes, if the deleted shard is the last shard folder in its index, the method will attempt to remove
* the index folder as well.
*
* @param reason the reason for the shard deletion
* @param shardId the shards ID to delete
* @param clusterState . This is required to access the indexes settings etc.
* @throws IOException if an IOException occurs
*/
public void deleteShardStore(String reason, ShardId shardId, ClusterState clusterState) throws IOException, ShardLockObtainFailedException {
final IndexMetadata metadata = clusterState.getMetadata().indices().get(shardId.getIndexName());
final IndexSettings indexSettings = buildIndexSettings(metadata);
ShardDeletionCheckResult shardDeletionCheckResult = canDeleteShardContent(shardId, indexSettings);
if (shardDeletionCheckResult != ShardDeletionCheckResult.FOLDER_FOUND_CAN_DELETE) {
throw new IllegalStateException("Can't delete shard " + shardId + " (cause: " + shardDeletionCheckResult + ")");
}
nodeEnv.deleteShardDirectorySafe(shardId, indexSettings);
logger.debug("{} deleted shard reason [{}]", shardId, reason);
if (canDeleteIndexContents(shardId.getIndex(), indexSettings)) {
if (nodeEnv.findAllShardIds(shardId.getIndex()).isEmpty()) {
try {
// note that deleteIndexStore have more safety checks and may throw an exception if index was concurrently created.
deleteIndexStore("no longer used", metadata);
} catch (Exception e) {
// wrap the exception to indicate we already deleted the shard
throw new OpenSearchException("failed to delete unused index after deleting its last shard (" + shardId + ")", e);
}
} else {
logger.trace("[{}] still has shard stores, leaving as is", shardId.getIndex());
}
}
}
use of org.opensearch.env.ShardLockObtainFailedException in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method deleteIndices.
/**
* Deletes indices (with shard data).
*
* @param event cluster change event
*/
private void deleteIndices(final ClusterChangedEvent event) {
final ClusterState previousState = event.previousState();
final ClusterState state = event.state();
final String localNodeId = state.nodes().getLocalNodeId();
assert localNodeId != null;
for (Index index : event.indicesDeleted()) {
if (logger.isDebugEnabled()) {
logger.debug("[{}] cleaning index, no longer part of the metadata", index);
}
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(index);
final IndexSettings indexSettings;
if (indexService != null) {
indexSettings = indexService.getIndexSettings();
indicesService.removeIndex(index, DELETED, "index no longer part of the metadata");
} else if (previousState.metadata().hasIndex(index)) {
// The deleted index was part of the previous cluster state, but not loaded on the local node
final IndexMetadata metadata = previousState.metadata().index(index);
indexSettings = new IndexSettings(metadata, settings);
indicesService.deleteUnassignedIndex("deleted index was not assigned to local node", metadata, state);
} else {
// previous cluster state is not initialized/recovered.
assert state.metadata().indexGraveyard().containsIndex(index) || previousState.blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK);
final IndexMetadata metadata = indicesService.verifyIndexIsDeleted(index, event.state());
if (metadata != null) {
indexSettings = new IndexSettings(metadata, settings);
} else {
indexSettings = null;
}
}
if (indexSettings != null) {
threadPool.generic().execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to complete pending deletion for index", index), e);
}
@Override
protected void doRun() throws Exception {
try {
// we are waiting until we can lock the index / all shards on the node and then we ack the delete of the store
// to the master. If we can't acquire the locks here immediately there might be a shard of this index still
// holding on to the lock due to a "currently canceled recovery" or so. The shard will delete itself BEFORE the
// lock is released so it's guaranteed to be deleted by the time we get the lock
indicesService.processPendingDeletes(index, indexSettings, new TimeValue(30, TimeUnit.MINUTES));
} catch (ShardLockObtainFailedException exc) {
logger.warn("[{}] failed to lock all shards for index - timed out after 30 seconds", index);
} catch (InterruptedException e) {
logger.warn("[{}] failed to lock all shards for index - interrupted", index);
}
}
});
}
}
}
use of org.opensearch.env.ShardLockObtainFailedException in project OpenSearch by opensearch-project.
the class PrimaryShardAllocatorTests method testShardLockObtainFailedException.
/**
* Tests that when the node returns a ShardLockObtainFailedException, it will be considered as a valid shard copy
*/
public void testShardLockObtainFailedException() {
final RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders(), CLUSTER_RECOVERED, "allocId1");
testAllocator.addData(node1, "allocId1", randomBoolean(), new ShardLockObtainFailedException(shardId, "test"));
allocateAllUnassigned(allocation);
assertThat(allocation.routingNodesChanged(), equalTo(true));
assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node1.getId()));
// check that allocation id is reused
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).allocationId().getId(), equalTo("allocId1"));
assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
}
use of org.opensearch.env.ShardLockObtainFailedException in project OpenSearch by opensearch-project.
the class InternalTestCluster method assertAfterTest.
@Override
public synchronized void assertAfterTest() throws Exception {
super.assertAfterTest();
assertRequestsFinished();
assertNoInFlightDocsInEngine();
for (NodeAndClient nodeAndClient : nodes.values()) {
NodeEnvironment env = nodeAndClient.node().getNodeEnvironment();
Set<ShardId> shardIds = env.lockedShards();
for (ShardId id : shardIds) {
try {
env.shardLock(id, "InternalTestCluster assert after test", TimeUnit.SECONDS.toMillis(5)).close();
} catch (ShardLockObtainFailedException ex) {
fail("Shard " + id + " is still locked after 5 sec waiting");
}
}
}
}
use of org.opensearch.env.ShardLockObtainFailedException in project OpenSearch by opensearch-project.
the class ExceptionSerializationTests method testShardLockObtainFailedException.
public void testShardLockObtainFailedException() throws IOException {
ShardId shardId = new ShardId("foo", "_na_", 1);
ShardLockObtainFailedException orig = new ShardLockObtainFailedException(shardId, "boom");
Version version = VersionUtils.randomIndexCompatibleVersion(random());
ShardLockObtainFailedException ex = serialize(orig, version);
assertEquals(orig.getMessage(), ex.getMessage());
assertEquals(orig.getShardId(), ex.getShardId());
}
Aggregations