use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderTests method testDiskThreshold.
@Test
public void testDiskThreshold() {
Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.7).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.8).build();
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 90% used
usagesBuilder.put("node1", new DiskUsage("node1", "node1", "/dev/null", 100, 10));
// 65% used
usagesBuilder.put("node2", new DiskUsage("node2", "node2", "/dev/null", 100, 35));
// 40% used
usagesBuilder.put("node3", new DiskUsage("node3", "node3", "/dev/null", 100, 60));
// 20% used
usagesBuilder.put("node4", new DiskUsage("node4", "node4", "/dev/null", 100, 80));
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
// 10 bytes
shardSizesBuilder.put("[test][0][p]", 10L);
shardSizesBuilder.put("[test][0][r]", 10L);
ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes);
ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
AllocationDeciders deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
ClusterInfoService cis = () -> {
logger.info("--> calling fake getClusterInfo");
return clusterInfo;
};
AllocationService strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).build();
final RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
logger.info("--> adding two nodes");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Primary shard should be initializing, replica should not
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (primaries)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logShardStates(clusterState);
// Assert that we're able to start the primary
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
// Assert that node1 didn't get any shards because its disk usage is too high
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
logger.info("--> start the shards (replicas)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logShardStates(clusterState);
// Assert that the replica couldn't be started since node1 doesn't have enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
logger.info("--> adding node3");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Assert that the replica is initialized now that node3 is available with enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (replicas)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logShardStates(clusterState);
// Assert that the replica couldn't be started since node1 doesn't have enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> changing decider settings");
// Set the low threshold to 60 instead of 70
// Set the high threshold to 70 instead of 80
// node2 now should not have new shards allocated to it, but shards can remain
diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.7).build();
deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> changing settings again");
// Set the low threshold to 50 instead of 60
// Set the high threshold to 60 instead of 70
// node2 now should not have new shards allocated to it, and shards cannot remain
diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.5).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.6).build();
deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
// Shard hasn't been moved off of node2 yet because there's nowhere for it to go
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> adding node4");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> apply INITIALIZING shards");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logShardStates(clusterState);
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
// Node4 is available now, so the shard is moved off of node2
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderUnitTests method testCannotAllocateDueToLackOfDiskResources.
@Test
public void testCannotAllocateDueToLackOfDiskResources() {
ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
final Index index = metadata.index("test").getIndex();
ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(index, 0), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(node_0).add(node_1)).build();
// actual test -- after all that bloat :)
ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsages = ImmutableOpenMap.builder();
// all full
leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, 0));
ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsage = ImmutableOpenMap.builder();
final int freeBytes = randomIntBetween(20, 100);
mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, freeBytes));
ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
// way bigger than available space
final long shardSize = randomIntBetween(110, 1000);
shardSizes.put("[test][0][p]", shardSize);
ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of());
RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime());
allocation.debugDecision(true);
Decision decision = decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation);
assertEquals(Decision.Type.NO, decision.type());
assertThat(decision.getExplanation(), containsString("allocating the shard to this node will bring the node above the high watermark cluster setting " + "[cluster.routing.allocation.disk.watermark.high=90%] " + "and cause it to have less than the minimum required [0b] of free space " + "(free: [" + freeBytes + "b], estimated shard size: [" + shardSize + "b])"));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDecider method getDiskUsage.
private DiskUsageWithRelocations getDiskUsage(RoutingNode node, RoutingAllocation allocation, ImmutableOpenMap<String, DiskUsage> usages, boolean subtractLeavingShards) {
DiskUsage usage = usages.get(node.nodeId());
if (usage == null) {
// If there is no usage, and we have other nodes in the cluster,
// use the average usage for all nodes as the usage for this node
usage = averageUsage(node, usages);
LOGGER.debug("unable to determine disk usage for {}, defaulting to average across nodes [{} total] [{} free] [{}% free]", node.nodeId(), usage.getTotalBytes(), usage.getFreeBytes(), usage.getFreeDiskAsPercentage());
}
final DiskUsageWithRelocations diskUsageWithRelocations = new DiskUsageWithRelocations(usage, sizeOfRelocatingShards(node, subtractLeavingShards, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable()));
LOGGER.trace("getDiskUsage(subtractLeavingShards={}) returning {}", subtractLeavingShards, diskUsageWithRelocations);
return diskUsageWithRelocations;
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDecider method canAllocate.
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
ClusterInfo clusterInfo = allocation.clusterInfo();
ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeMostAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
final double usedDiskThresholdLow = 100.0 - diskThresholdSettings.getFreeDiskThresholdLow();
final double usedDiskThresholdHigh = 100.0 - diskThresholdSettings.getFreeDiskThresholdHigh();
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
// and take the size into account
DiskUsage usage = getDiskUsage(node, allocation, usages, false);
// First, check that the node currently over the low watermark
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
// Cache the used disk percentage for displaying disk percentages consistent with documentation
double usedDiskPercentage = usage.getUsedDiskAsPercentage();
long freeBytes = usage.getFreeBytes();
if (logger.isTraceEnabled()) {
logger.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
}
// flag that determines whether the low threshold checks below can be skipped. We use this for a primary shard that is freshly
// allocated and empty.
boolean skipLowTresholdChecks = shardRouting.primary() && shardRouting.active() == false && shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE;
// checks for exact byte comparisons
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
if (skipLowTresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, preventing allocation", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes));
} else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "but allowing allocation because primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
}
}
// checks for percentage comparisons
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdLow()) {
// If the shard is a replica or is a non-empty primary, check the low threshold
if (skipLowTresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], preventing allocation", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage);
} else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], " + "but allowing allocation because primary has never been allocated", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage);
}
}
// Secondly, check that allocating the shard to this node doesn't put it above the high watermark
final long shardSize = getExpectedShardSize(shardRouting, allocation, 0);
double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
long freeBytesAfterShard = freeBytes - shardSize;
if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
logger.warn("after allocating, node [{}] would have less than the required " + "{} free bytes threshold ({} bytes free), preventing allocation", node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesAfterShard);
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to have less than the minimum required [%s] of free space (free bytes after shard added: [%s])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytesAfterShard));
}
if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
logger.warn("after allocating, node [{}] would have more than the allowed " + "{} free disk threshold ({} free), preventing allocation", node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%"));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard);
}
return allocation.decision(Decision.YES, NAME, "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]", new ByteSizeValue(freeBytes), new ByteSizeValue(shardSize), new ByteSizeValue(freeBytesAfterShard));
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDecider method getDiskUsage.
private DiskUsage getDiskUsage(RoutingNode node, RoutingAllocation allocation, ImmutableOpenMap<String, DiskUsage> usages, boolean subtractLeavingShards) {
DiskUsage usage = usages.get(node.nodeId());
if (usage == null) {
// If there is no usage, and we have other nodes in the cluster,
// use the average usage for all nodes as the usage for this node
usage = averageUsage(node, usages);
if (logger.isDebugEnabled()) {
logger.debug("unable to determine disk usage for {}, defaulting to average across nodes [{} total] [{} free] [{}% free]", node.nodeId(), usage.getTotalBytes(), usage.getFreeBytes(), usage.getFreeDiskAsPercentage());
}
}
if (diskThresholdSettings.includeRelocations()) {
long relocatingShardsSize = sizeOfRelocatingShards(node, allocation, subtractLeavingShards, usage.getPath());
DiskUsage usageIncludingRelocations = new DiskUsage(node.nodeId(), node.node().getName(), usage.getPath(), usage.getTotalBytes(), usage.getFreeBytes() - relocatingShardsSize);
if (logger.isTraceEnabled()) {
logger.trace("usage without relocations: {}", usage);
logger.trace("usage with relocations: [{} bytes] {}", relocatingShardsSize, usageIncludingRelocations);
}
usage = usageIncludingRelocations;
}
return usage;
}
Aggregations