use of org.opensearch.cluster.ClusterInfo in project OpenSearch by opensearch-project.
the class DiskThresholdMonitorTests method testMarkFloodStageIndicesReadOnly.
public void testMarkFloodStageIndicesReadOnly() {
AllocationService allocation = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).build());
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT).put("index.routing.allocation.require._id", "node2")).numberOfShards(1).numberOfReplicas(0)).put(IndexMetadata.builder("test_1").settings(settings(Version.CURRENT).put("index.routing.allocation.require._id", "node1")).numberOfShards(1).numberOfReplicas(0)).put(IndexMetadata.builder("test_2").settings(settings(Version.CURRENT).put("index.routing.allocation.require._id", "node1")).numberOfShards(1).numberOfReplicas(0)).build();
RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).addAsNew(metadata.index("test_1")).addAsNew(metadata.index("test_2")).build();
final ClusterState clusterState = applyStartedShardsUntilNoChange(ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build(), allocation);
AtomicBoolean reroute = new AtomicBoolean(false);
AtomicReference<Set<String>> indices = new AtomicReference<>();
AtomicLong currentTime = new AtomicLong();
DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterState, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, currentTime::get, (reason, priority, listener) -> {
assertTrue(reroute.compareAndSet(false, true));
assertThat(priority, equalTo(Priority.HIGH));
listener.onResponse(null);
}) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, ActionListener<Void> listener, boolean readOnly) {
assertTrue(indices.compareAndSet(null, indicesToMarkReadOnly));
assertTrue(readOnly);
listener.onResponse(null);
}
};
ImmutableOpenMap.Builder<String, DiskUsage> builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, 4));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, 30));
monitor.onNewInfo(clusterInfo(builder.build()));
assertFalse(reroute.get());
assertEquals(new HashSet<>(Arrays.asList("test_1", "test_2")), indices.get());
indices.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, 4));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, 5));
currentTime.addAndGet(randomLongBetween(60001, 120000));
monitor.onNewInfo(clusterInfo(builder.build()));
assertTrue(reroute.get());
assertEquals(new HashSet<>(Arrays.asList("test_1", "test_2")), indices.get());
IndexMetadata indexMetadata = IndexMetadata.builder(clusterState.metadata().index("test_2")).settings(Settings.builder().put(clusterState.metadata().index("test_2").getSettings()).put(IndexMetadata.INDEX_BLOCKS_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), true)).build();
// now we mark one index as read-only and assert that we don't mark it as such again
final ClusterState anotherFinalClusterState = ClusterState.builder(clusterState).metadata(Metadata.builder(clusterState.metadata()).put(clusterState.metadata().index("test"), false).put(clusterState.metadata().index("test_1"), false).put(indexMetadata, true).build()).blocks(ClusterBlocks.builder().addBlocks(indexMetadata).build()).build();
assertTrue(anotherFinalClusterState.blocks().indexBlocked(ClusterBlockLevel.WRITE, "test_2"));
monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> anotherFinalClusterState, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, currentTime::get, (reason, priority, listener) -> {
assertTrue(reroute.compareAndSet(false, true));
assertThat(priority, equalTo(Priority.HIGH));
listener.onResponse(null);
}) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, ActionListener<Void> listener, boolean readOnly) {
assertTrue(indices.compareAndSet(null, indicesToMarkReadOnly));
assertTrue(readOnly);
listener.onResponse(null);
}
};
indices.set(null);
reroute.set(false);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, 4));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, 5));
monitor.onNewInfo(clusterInfo(builder.build()));
assertTrue(reroute.get());
assertEquals(Collections.singleton("test_1"), indices.get());
}
use of org.opensearch.cluster.ClusterInfo in project OpenSearch by opensearch-project.
the class DiskThresholdMonitorTests method testDoesNotSubmitRerouteTaskTooFrequently.
public void testDoesNotSubmitRerouteTaskTooFrequently() {
final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
AtomicLong currentTime = new AtomicLong();
AtomicReference<ActionListener<ClusterState>> listenerReference = new AtomicReference<>();
DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterState, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, currentTime::get, (reason, priority, listener) -> {
assertNotNull(listener);
assertThat(priority, equalTo(Priority.HIGH));
assertTrue(listenerReference.compareAndSet(null, listener));
}) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, ActionListener<Void> listener, boolean readOnly) {
throw new AssertionError("unexpected");
}
};
final ImmutableOpenMap.Builder<String, DiskUsage> allDisksOkBuilder;
allDisksOkBuilder = ImmutableOpenMap.builder();
allDisksOkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, 50));
allDisksOkBuilder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, 50));
final ImmutableOpenMap<String, DiskUsage> allDisksOk = allDisksOkBuilder.build();
final ImmutableOpenMap.Builder<String, DiskUsage> oneDiskAboveWatermarkBuilder = ImmutableOpenMap.builder();
oneDiskAboveWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 9)));
oneDiskAboveWatermarkBuilder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, 50));
final ImmutableOpenMap<String, DiskUsage> oneDiskAboveWatermark = oneDiskAboveWatermarkBuilder.build();
// should not reroute when all disks are ok
currentTime.addAndGet(randomLongBetween(0, 120000));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNull(listenerReference.get());
// should reroute when one disk goes over the watermark
currentTime.addAndGet(randomLongBetween(0, 120000));
monitor.onNewInfo(clusterInfo(oneDiskAboveWatermark));
assertNotNull(listenerReference.get());
listenerReference.getAndSet(null).onResponse(clusterState);
if (randomBoolean()) {
// should not re-route again within the reroute interval
currentTime.addAndGet(randomLongBetween(0, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).millis()));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNull(listenerReference.get());
}
// should reroute again when one disk is still over the watermark
currentTime.addAndGet(randomLongBetween(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).millis() + 1, 120000));
monitor.onNewInfo(clusterInfo(oneDiskAboveWatermark));
assertNotNull(listenerReference.get());
final ActionListener<ClusterState> rerouteListener1 = listenerReference.getAndSet(null);
// should not re-route again before reroute has completed
currentTime.addAndGet(randomLongBetween(0, 120000));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNull(listenerReference.get());
// complete reroute
rerouteListener1.onResponse(clusterState);
if (randomBoolean()) {
// should not re-route again within the reroute interval
currentTime.addAndGet(randomLongBetween(0, DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).millis()));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNull(listenerReference.get());
}
// should reroute again after the reroute interval
currentTime.addAndGet(randomLongBetween(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).millis() + 1, 120000));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNotNull(listenerReference.get());
listenerReference.getAndSet(null).onResponse(null);
// should not reroute again when it is not required
currentTime.addAndGet(randomLongBetween(0, 120000));
monitor.onNewInfo(clusterInfo(allDisksOk));
assertNull(listenerReference.get());
// should reroute again when one disk has reserved space that pushes it over the high watermark
final ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> builder = ImmutableOpenMap.builder(1);
builder.put(new ClusterInfo.NodeAndPath("node1", "/foo/bar"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("baz", "quux", 0), between(41, 100)).build());
final ImmutableOpenMap<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpaces = builder.build();
currentTime.addAndGet(randomLongBetween(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).millis() + 1, 120000));
monitor.onNewInfo(clusterInfo(allDisksOk, reservedSpaces));
assertNotNull(listenerReference.get());
listenerReference.getAndSet(null).onResponse(null);
}
use of org.opensearch.cluster.ClusterInfo in project OpenSearch by opensearch-project.
the class IndexShardConstraintDeciderOverlapTests method testHighWatermarkBreachWithLowShardCount.
/**
* High watermark breach blocks new shard allocations to affected nodes. If shard count on such
* nodes is low, this will cause IndexShardPerNodeConstraint to breach.
*
* This test verifies that this doesn't lead to unassigned shards, and there are no hot spots in eligible
* nodes.
*/
public void testHighWatermarkBreachWithLowShardCount() {
setupInitialCluster(3, 15, 10, 1);
addNodesWithIndexing(1, "high_watermark_node_", 6, 5, 1);
// Disk threshold settings enabled
Settings settings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.7).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.8).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), 0.95).put("cluster.routing.allocation.node_concurrent_recoveries", 1).put("cluster.routing.allocation.cluster_concurrent_recoveries", 1).build();
// Build Shard size and disk usages
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 20% used
usagesBuilder.put("node_0", new DiskUsage("node_0", "node_0", "/dev/null", 100, 80));
// 45% used
usagesBuilder.put("node_1", new DiskUsage("node_1", "node_1", "/dev/null", 100, 55));
// 65% used
usagesBuilder.put("node_2", new DiskUsage("node_2", "node_2", "/dev/null", 100, 35));
// 90%
usagesBuilder.put("high_watermark_node_0", new DiskUsage("high_watermark_node_0", "high_watermark_node_0", "/dev/null", 100, 10));
// used
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
// Each
clusterState.getRoutingTable().allShards().forEach(shard -> shardSizesBuilder.put(shardIdentifierFromRouting(shard), 1L));
// shard
// is 1
// byte
ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
final ImmutableOpenMap<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpace = new ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace>().fPut(getNodeAndDevNullPath("node_0"), getReservedSpace()).fPut(getNodeAndDevNullPath("node_1"), getReservedSpace()).fPut(getNodeAndDevNullPath("node_2"), getReservedSpace()).fPut(getNodeAndDevNullPath("high_watermark_node_0"), getReservedSpace()).build();
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes, reservedSpace);
ClusterInfoService cis = () -> clusterInfo;
allocation = createAllocationService(settings, cis);
allocateAndCheckIndexShardHotSpots(false, 3, "node_0", "node_1", "node_2");
assertForIndexShardHotSpots(true, 4);
assertTrue(clusterState.getRoutingTable().shardsWithState(UNASSIGNED).isEmpty());
assertTrue(clusterState.getRoutingNodes().node("high_watermark_node_0").isEmpty());
/* Shard sizes that would breach high watermark on node_2 if allocated.
*/
addIndices("big_index_", 1, 10, 0);
ImmutableOpenMap.Builder<String, Long> bigIndexShardSizeBuilder = ImmutableOpenMap.builder(shardSizes);
clusterState.getRoutingNodes().unassigned().forEach(shard -> bigIndexShardSizeBuilder.put(shardIdentifierFromRouting(shard), 20L));
shardSizes = bigIndexShardSizeBuilder.build();
final ClusterInfo bigIndexClusterInfo = new DevNullClusterInfo(usages, usages, shardSizes, reservedSpace);
cis = () -> bigIndexClusterInfo;
allocation = createAllocationService(settings, cis);
allocateAndCheckIndexShardHotSpots(false, 2, "node_0", "node_1");
assertForIndexShardHotSpots(true, 4);
assertTrue(clusterState.getRoutingTable().shardsWithState(UNASSIGNED).isEmpty());
for (ShardRouting shard : clusterState.getRoutingTable().index("big_index_0").shardsWithState(STARTED)) {
assertNotEquals("node_2", shard.currentNodeId());
}
}
use of org.opensearch.cluster.ClusterInfo in project OpenSearch by opensearch-project.
the class ExpectedShardSizeAllocationTests method testExpectedSizeOnMove.
public void testExpectedSizeOnMove() {
final long byteSize = randomIntBetween(0, Integer.MAX_VALUE);
final AllocationService allocation = createAllocationService(Settings.EMPTY, () -> new ClusterInfo() {
@Override
public Long getShardSize(ShardRouting shardRouting) {
if (shardRouting.getIndexName().equals("test") && shardRouting.shardId().getId() == 0) {
return byteSize;
}
return null;
}
});
logger.info("creating an index with 1 shard, no replica");
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)).build();
RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
logger.info("adding two nodes and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = allocation.reroute(clusterState, "reroute");
logger.info("start primary shard");
clusterState = startInitializingShardsAndReroute(allocation, clusterState);
logger.info("move the shard");
String existingNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
String toNodeId;
if ("node1".equals(existingNodeId)) {
toNodeId = "node2";
} else {
toNodeId = "node1";
}
AllocationService.CommandsResult commandsResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false);
assertThat(commandsResult.getClusterState(), not(equalTo(clusterState)));
clusterState = commandsResult.getClusterState();
assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), ShardRoutingState.RELOCATING);
assertEquals(clusterState.getRoutingNodes().node(toNodeId).iterator().next().state(), ShardRoutingState.INITIALIZING);
assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().getExpectedShardSize(), byteSize);
assertEquals(clusterState.getRoutingNodes().node(toNodeId).iterator().next().getExpectedShardSize(), byteSize);
logger.info("finish moving the shard");
clusterState = startInitializingShardsAndReroute(allocation, clusterState);
assertThat(clusterState.getRoutingNodes().node(existingNodeId).isEmpty(), equalTo(true));
assertThat(clusterState.getRoutingNodes().node(toNodeId).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
assertEquals(clusterState.getRoutingNodes().node(toNodeId).iterator().next().getExpectedShardSize(), -1);
}
use of org.opensearch.cluster.ClusterInfo in project OpenSearch by opensearch-project.
the class DiskThresholdDecider method canAllocate.
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
ClusterInfo clusterInfo = allocation.clusterInfo();
ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeMostAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
final double usedDiskThresholdLow = 100.0 - diskThresholdSettings.getFreeDiskThresholdLow();
final double usedDiskThresholdHigh = 100.0 - diskThresholdSettings.getFreeDiskThresholdHigh();
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore we should be extra careful
// and take the size into account
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, false);
// First, check that the node currently over the low watermark
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
// Cache the used disk percentage for displaying disk percentages consistent with documentation
double usedDiskPercentage = usage.getUsedDiskAsPercentage();
long freeBytes = usage.getFreeBytes();
if (freeBytes < 0L) {
final long sizeOfRelocatingShards = sizeOfRelocatingShards(node, false, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
logger.debug("fewer free bytes remaining than the size of all incoming shards: " + "usage {} on node {} including {} bytes of relocations, preventing allocation", usage, node.nodeId(), sizeOfRelocatingShards);
return allocation.decision(Decision.NO, NAME, "the node has fewer free bytes remaining than the total size of all incoming shards: " + "free space [%sB], relocating shards [%sB]", freeBytes + sizeOfRelocatingShards, sizeOfRelocatingShards);
}
ByteSizeValue freeBytesValue = new ByteSizeValue(freeBytes);
if (logger.isTraceEnabled()) {
logger.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
}
// flag that determines whether the low threshold checks below can be skipped. We use this for a primary shard that is freshly
// allocated and empty.
boolean skipLowThresholdChecks = shardRouting.primary() && shardRouting.active() == false && shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE;
// checks for exact byte comparisons
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
if (skipLowThresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} free) on node {}, preventing allocation", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue);
} else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} free) on node {}, " + "but allowing allocation because primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} free) on node {}, " + "preventing allocation even though primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue);
}
}
// checks for percentage comparisons
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdLow()) {
// If the shard is a replica or is a non-empty primary, check the low threshold
if (skipLowThresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], preventing allocation", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage);
} else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], " + "but allowing allocation because primary has never been allocated", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage);
}
}
// Secondly, check that allocating the shard to this node doesn't put it above the high watermark
final long shardSize = getExpectedShardSize(shardRouting, 0L, allocation.clusterInfo(), allocation.snapshotShardSizeInfo(), allocation.metadata(), allocation.routingTable());
assert shardSize >= 0 : shardSize;
double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
long freeBytesAfterShard = freeBytes - shardSize;
if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
logger.warn("after allocating [{}] node [{}] would have less than the required threshold of " + "{} free (currently {} free, estimated shard size is {}), preventing allocation", shardRouting, node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, new ByteSizeValue(shardSize));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to have less than the minimum required [%s] of free space (free: [%s], estimated shard size: [%s])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, new ByteSizeValue(shardSize));
}
if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
logger.warn("after allocating [{}] node [{}] would have more than the allowed " + "{} free disk threshold ({} free), preventing allocation", shardRouting, node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%"));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard);
}
assert freeBytesAfterShard >= 0 : freeBytesAfterShard;
return allocation.decision(Decision.YES, NAME, "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]", freeBytesValue, new ByteSizeValue(shardSize), new ByteSizeValue(freeBytesAfterShard));
}
Aggregations