use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdMonitor method onNewInfo.
@Override
public void onNewInfo(ClusterInfo info) {
ImmutableOpenMap<String, DiskUsage> usages = info.getNodeLeastAvailableDiskUsages();
if (usages != null) {
boolean reroute = false;
String explanation = "";
// Garbage collect nodes that have been removed from the cluster
// from the map that tracks watermark crossing
ObjectLookupContainer<String> nodes = usages.keys();
for (String node : nodeHasPassedWatermark) {
if (nodes.contains(node) == false) {
nodeHasPassedWatermark.remove(node);
}
}
for (ObjectObjectCursor<String, DiskUsage> entry : usages) {
String node = entry.key;
DiskUsage usage = entry.value;
warnAboutDiskIfNeeded(usage);
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if ((System.nanoTime() - lastRunNS) > diskThresholdSettings.getRerouteInterval().nanos()) {
lastRunNS = System.nanoTime();
reroute = true;
explanation = "high disk watermark exceeded on one or more nodes";
} else {
logger.debug("high disk watermark exceeded on {} but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
nodeHasPassedWatermark.add(node);
} else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
nodeHasPassedWatermark.add(node);
} else {
if (nodeHasPassedWatermark.contains(node)) {
// if they are able to be
if ((System.nanoTime() - lastRunNS) > diskThresholdSettings.getRerouteInterval().nanos()) {
lastRunNS = System.nanoTime();
reroute = true;
explanation = "one or more nodes has gone under the high or low watermark";
nodeHasPassedWatermark.remove(node);
} else {
logger.debug("{} has gone below a disk threshold, but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
}
}
}
if (reroute) {
logger.info("rerouting shards: [{}]", explanation);
// Execute an empty reroute, but don't block on the response
client.admin().cluster().prepareReroute().execute();
}
}
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDeciderTests method testAverageUsage.
public void testAverageUsage() {
RoutingNode rn = new RoutingNode("node1", newNode("node1"));
DiskThresholdDecider decider = makeDecider(Settings.EMPTY);
ImmutableOpenMap.Builder<String, DiskUsage> usages = ImmutableOpenMap.builder();
// 50% used
usages.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 50));
// 100% used
usages.put("node3", new DiskUsage("node3", "n3", "/dev/null", 100, 0));
DiskUsage node1Usage = decider.averageUsage(rn, usages.build());
assertThat(node1Usage.getTotalBytes(), equalTo(100L));
assertThat(node1Usage.getFreeBytes(), equalTo(25L));
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDeciderTests method testUnknownDiskUsage.
public void testUnknownDiskUsage() {
Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.7).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.85).build();
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 50% used
usagesBuilder.put("node2", new DiskUsage("node2", "node2", "/dev/null", 100, 50));
// 100% used
usagesBuilder.put("node3", new DiskUsage("node3", "node3", "/dev/null", 100, 0));
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
// 10 bytes
shardSizesBuilder.put("[test][0][p]", 10L);
// 10 bytes
shardSizesBuilder.put("[test][0][r]", 10L);
ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes);
AllocationDeciders deciders = new AllocationDeciders(Settings.EMPTY, new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), makeDecider(diskSettings))));
ClusterInfoService cis = new ClusterInfoService() {
@Override
public ClusterInfo getClusterInfo() {
logger.info("--> calling fake getClusterInfo");
return clusterInfo;
}
@Override
public void addListener(Listener listener) {
// noop
}
};
AllocationService strategy = new AllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.cluster_concurrent_rebalance", -1).build(), deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)).build();
RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable).build();
logger.info("--> adding node1");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(// node3 is added because DiskThresholdDecider automatically ignore single-node clusters
newNode("node3"))).build();
routingTable = strategy.reroute(clusterState, "reroute").routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
// Shard can be allocated to node1, even though it only has 25% free,
// because it's a primary that's never been allocated before
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (primaries)");
routingTable = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
logShardStates(clusterState);
// A single shard is started on node1, even though it normally would not
// be allowed, because it's a primary that hasn't been allocated, and node1
// is still below the high watermark (unlike node3)
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDeciderTests method testDiskThreshold.
public void testDiskThreshold() {
Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.7).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.8).build();
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 90% used
usagesBuilder.put("node1", new DiskUsage("node1", "node1", "/dev/null", 100, 10));
// 65% used
usagesBuilder.put("node2", new DiskUsage("node2", "node2", "/dev/null", 100, 35));
// 40% used
usagesBuilder.put("node3", new DiskUsage("node3", "node3", "/dev/null", 100, 60));
// 20% used
usagesBuilder.put("node4", new DiskUsage("node4", "node4", "/dev/null", 100, 80));
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
// 10 bytes
shardSizesBuilder.put("[test][0][p]", 10L);
shardSizesBuilder.put("[test][0][r]", 10L);
ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes);
ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
AllocationDeciders deciders = new AllocationDeciders(Settings.EMPTY, new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
ClusterInfoService cis = new ClusterInfoService() {
@Override
public ClusterInfo getClusterInfo() {
logger.info("--> calling fake getClusterInfo");
return clusterInfo;
}
@Override
public void addListener(Listener listener) {
// noop
}
};
AllocationService strategy = new AllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.cluster_concurrent_rebalance", -1).build(), deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
final RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("--> adding two nodes");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Primary shard should be initializing, replica should not
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (primaries)");
clusterState = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logShardStates(clusterState);
// Assert that we're able to start the primary
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
// Assert that node1 didn't get any shards because its disk usage is too high
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
logger.info("--> start the shards (replicas)");
clusterState = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logShardStates(clusterState);
// Assert that the replica couldn't be started since node1 doesn't have enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
logger.info("--> adding node3");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Assert that the replica is initialized now that node3 is available with enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (replicas)");
clusterState = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logShardStates(clusterState);
// Assert that the replica couldn't be started since node1 doesn't have enough space
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> changing decider settings");
// Set the low threshold to 60 instead of 70
// Set the high threshold to 70 instead of 80
// node2 now should not have new shards allocated to it, but shards can remain
diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.7).build();
deciders = new AllocationDeciders(Settings.EMPTY, new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
strategy = new AllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.cluster_concurrent_rebalance", -1).build(), deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> changing settings again");
// Set the low threshold to 50 instead of 60
// Set the high threshold to 60 instead of 70
// node2 now should not have new shards allocated to it, and shards cannot remain
diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.5).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.6).build();
deciders = new AllocationDeciders(Settings.EMPTY, new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))));
strategy = new AllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.cluster_concurrent_rebalance", -1).build(), deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
// Shard hasn't been moved off of node2 yet because there's nowhere for it to go
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
logger.info("--> adding node4");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// Shards remain started
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> apply INITIALIZING shards");
clusterState = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logShardStates(clusterState);
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
// Node4 is available now, so the shard is moved off of node2
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
}
use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class DiskThresholdDeciderUnitTests method testCanAllocateUsesMaxAvailableSpace.
public void testCanAllocateUsesMaxAvailableSpace() {
ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
final Index index = metaData.index("test").getIndex();
ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(index, 0), true, StoreRecoverySource.EMPTY_STORE_INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(Arrays.asList(DiscoveryNode.Role.values())), Version.CURRENT);
DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(Arrays.asList(DiscoveryNode.Role.values())), Version.CURRENT);
RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();
ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(node_0).add(node_1)).build();
// actual test -- after all that bloat :)
ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsages = ImmutableOpenMap.builder();
// all full
leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, 0));
// all full
leastAvailableUsages.put("node_1", new DiskUsage("node_1", "node_1", "_na_", 100, 0));
ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsage = ImmutableOpenMap.builder();
// 20 - 99 percent since after allocation there must be at least 10% left and shard is 10byte
mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, randomIntBetween(20, 100)));
// this is weird and smells like a bug! it should be up to 20%?
mostAvailableUsage.put("node_1", new DiskUsage("node_1", "node_1", "_na_", 100, randomIntBetween(0, 10)));
ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
// 10 bytes
shardSizes.put("[test][0][p]", 10L);
final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of());
RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false);
allocation.debugDecision(true);
Decision decision = decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation);
assertEquals(mostAvailableUsage.toString(), Decision.Type.YES, decision.type());
assertThat(((Decision.Single) decision).getExplanation(), containsString("enough disk for shard on node"));
decision = decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation);
assertEquals(mostAvailableUsage.toString(), Decision.Type.NO, decision.type());
assertThat(((Decision.Single) decision).getExplanation(), containsString("the node is above the high watermark cluster setting [cluster.routing.allocation.disk.watermark.high=90%], using more " + "disk space than the maximum allowed [90.0%]"));
}
Aggregations