use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdMonitor method onNewInfo.
public void onNewInfo(ClusterInfo info) {
if (checkInProgress.compareAndSet(false, true) == false) {
LOGGER.info("skipping monitor as a check is already in progress");
return;
}
final ImmutableOpenMap<String, DiskUsage> usages = info.getNodeLeastAvailableDiskUsages();
if (usages == null) {
checkFinished();
return;
}
boolean reroute = false;
String explanation = "";
final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
// Garbage collect nodes that have been removed from the cluster
// from the map that tracks watermark crossing
final ObjectLookupContainer<String> nodes = usages.keys();
for (String node : nodeHasPassedWatermark) {
if (nodes.contains(node) == false) {
nodeHasPassedWatermark.remove(node);
}
}
final ClusterState state = clusterStateSupplier.get();
final Set<String> indicesToMarkReadOnly = new HashSet<>();
for (final ObjectObjectCursor<String, DiskUsage> entry : usages) {
final String node = entry.key;
final DiskUsage usage = entry.value;
warnAboutDiskIfNeeded(usage);
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) {
final RoutingNode routingNode = state.getRoutingNodes().node(node);
if (routingNode != null) {
// this might happen if we haven't got the full cluster-state yet?!
for (ShardRouting routing : routingNode) {
indicesToMarkReadOnly.add(routing.index().getName());
}
}
} else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
reroute = true;
explanation = "high disk watermark exceeded on one or more nodes";
} else {
LOGGER.debug("high disk watermark exceeded on {} but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
nodeHasPassedWatermark.add(node);
} else if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
nodeHasPassedWatermark.add(node);
} else {
if (nodeHasPassedWatermark.contains(node)) {
// if they are able to be
if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
reroute = true;
explanation = "one or more nodes has gone under the high or low watermark";
nodeHasPassedWatermark.remove(node);
} else {
LOGGER.debug("{} has gone below a disk threshold, but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
}
}
}
final ActionListener<Void> listener = new GroupedActionListener<>(ActionListener.wrap(this::checkFinished), 2);
if (reroute) {
LOGGER.info("rerouting shards: [{}]", explanation);
rerouteService.reroute("disk threshold monitor", Priority.HIGH, ActionListener.wrap(r -> {
setLastRunTimeMillis();
listener.onResponse(r);
}, e -> {
LOGGER.debug("reroute failed", e);
setLastRunTimeMillis();
listener.onFailure(e);
}));
} else {
listener.onResponse(null);
}
indicesToMarkReadOnly.removeIf(index -> state.getBlocks().indexBlocked(ClusterBlockLevel.WRITE, index));
if (indicesToMarkReadOnly.isEmpty() == false) {
markIndicesReadOnly(indicesToMarkReadOnly, ActionListener.wrap(r -> {
setLastRunTimeMillis();
listener.onResponse(r);
}, e -> {
LOGGER.debug("marking indices readonly failed", e);
setLastRunTimeMillis();
listener.onFailure(e);
}));
} else {
listener.onResponse(null);
}
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderTests method testDiskUsageWithRelocations.
@Test
public void testDiskUsageWithRelocations() {
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 1000L), 0).getFreeBytes(), equalTo(1000L));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 1000L), 9).getFreeBytes(), equalTo(991L));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 1000L), -9).getFreeBytes(), equalTo(1009L));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 1000L), 0).getFreeDiskAsPercentage(), equalTo(100.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 500L), 0).getFreeDiskAsPercentage(), equalTo(50.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 500L), 100).getFreeDiskAsPercentage(), equalTo(40.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 1000L), 0).getUsedDiskAsPercentage(), equalTo(0.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 500L), 0).getUsedDiskAsPercentage(), equalTo(50.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", 1000L, 500L), 100).getUsedDiskAsPercentage(), equalTo(60.0));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", Long.MAX_VALUE, Long.MAX_VALUE), 0).getFreeBytes(), equalTo(Long.MAX_VALUE));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", Long.MAX_VALUE, Long.MAX_VALUE), 10).getFreeBytes(), equalTo(Long.MAX_VALUE - 10));
assertThat(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("n", "n", "/dev/null", Long.MAX_VALUE, Long.MAX_VALUE), -10).getFreeBytes(), equalTo(Long.MAX_VALUE));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderTests method testFreeDiskPercentageAfterShardAssigned.
public void testFreeDiskPercentageAfterShardAssigned() {
DiskThresholdDecider decider = makeDecider(Settings.EMPTY);
Map<String, DiskUsage> usages = new HashMap<>();
// 50% used
usages.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 50));
// 100% used
usages.put("node3", new DiskUsage("node3", "n3", "/dev/null", 100, 0));
Double after = decider.freeDiskPercentageAfterShardAssigned(new DiskThresholdDecider.DiskUsageWithRelocations(new DiskUsage("node2", "n2", "/dev/null", 100, 30), 0L), 11L);
assertThat(after, equalTo(19.0));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderTests method testCanRemainWithShardRelocatingAway.
public void testCanRemainWithShardRelocatingAway() {
Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "70%").build();
// We have an index with 2 primary shards each taking 40 bytes. Each node has 100 bytes available
ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 80% used
usagesBuilder.put("node1", new DiskUsage("node1", "n1", "/dev/null", 100, 20));
// 0% used
usagesBuilder.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 100));
ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
shardSizesBuilder.put("[test][0][p]", 40L);
shardSizesBuilder.put("[test][1][p]", 40L);
shardSizesBuilder.put("[foo][0][p]", 10L);
ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes);
DiskThresholdDecider diskThresholdDecider = makeDecider(diskSettings);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).put(IndexMetadata.builder("foo").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).addAsNew(metadata.index("foo")).build();
DiscoveryNode discoveryNode1 = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, Version.CURRENT);
DiscoveryNode discoveryNode2 = new DiscoveryNode("node2", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, Version.CURRENT);
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(discoveryNode1).add(discoveryNode2).build();
ClusterState baseClusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).nodes(discoveryNodes).build();
// Two shards consuming each 80% of disk space while 70% is allowed, so shard 0 isn't allowed here
ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED);
ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", null, true, ShardRoutingState.STARTED);
RoutingNode firstRoutingNode = new RoutingNode("node1", discoveryNode1, firstRouting, secondRouting);
RoutingTable.Builder builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build();
RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
routingAllocation.debugDecision(true);
Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
assertThat(decision.type(), equalTo(Decision.Type.NO));
assertThat(decision.getExplanation(), containsString("the shard cannot remain on this node because it is above the high watermark cluster setting " + "[cluster.routing.allocation.disk.watermark.high=70%] and there is less than the required [30.0%] free disk on node, " + "actual free: [20.0%]"));
// Two shards consuming each 80% of disk space while 70% is allowed, but one is relocating, so shard 0 can stay
firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED);
secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", "node2", true, ShardRoutingState.RELOCATING);
ShardRouting fooRouting = TestShardRouting.newShardRouting("foo", 0, null, true, ShardRoutingState.UNASSIGNED);
firstRoutingNode = new RoutingNode("node1", discoveryNode1, firstRouting, secondRouting);
builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build();
routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
routingAllocation.debugDecision(true);
decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
assertThat(decision.type(), equalTo(Decision.Type.YES));
assertEquals("there is enough disk on this node for the shard to remain, free: [60b]", decision.getExplanation());
decision = diskThresholdDecider.canAllocate(fooRouting, firstRoutingNode, routingAllocation);
assertThat(decision.type(), equalTo(Decision.Type.NO));
if (fooRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE) {
assertThat(decision.getExplanation(), containsString("the node is above the high watermark cluster setting [cluster.routing.allocation.disk.watermark.high=70%], using " + "more disk space than the maximum allowed [70.0%], actual free: [20.0%]"));
} else {
assertThat(decision.getExplanation(), containsString("the node is above the low watermark cluster setting [cluster.routing.allocation.disk.watermark.low=60%], using more " + "disk space than the maximum allowed [60.0%], actual free: [20.0%]"));
}
// Creating AllocationService instance and the services it depends on...
ClusterInfoService cis = () -> {
logger.info("--> calling fake getClusterInfo");
return clusterInfo;
};
AllocationDeciders deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), diskThresholdDecider)));
AllocationService strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
// Ensure that the reroute call doesn't alter the routing table, since the first primary is relocating away
// and therefor we will have sufficient disk space on node1.
ClusterState result = strategy.reroute(clusterState, "reroute");
assertThat(result, equalTo(clusterState));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().state(), equalTo(STARTED));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().currentNodeId(), equalTo("node1"));
assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().relocatingNodeId(), nullValue());
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().state(), equalTo(RELOCATING));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().currentNodeId(), equalTo("node1"));
assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().relocatingNodeId(), equalTo("node2"));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDeciderUnitTests method testCanRemainUsesLeastAvailableSpace.
@Test
public void testCanRemainUsesLeastAvailableSpace() {
ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);
ImmutableOpenMap.Builder<ShardRouting, String> shardRoutingMap = ImmutableOpenMap.builder();
DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
final IndexMetadata indexMetadata = metadata.index("test");
ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 0), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
test_0 = ShardRoutingHelper.initialize(test_0, node_0.getId());
test_0 = ShardRoutingHelper.moveToStarted(test_0);
shardRoutingMap.put(test_0, "/node0/least");
ShardRouting test_1 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 1), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
test_1 = ShardRoutingHelper.initialize(test_1, node_1.getId());
test_1 = ShardRoutingHelper.moveToStarted(test_1);
shardRoutingMap.put(test_1, "/node1/least");
ShardRouting test_2 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 2), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
test_2 = ShardRoutingHelper.initialize(test_2, node_1.getId());
test_2 = ShardRoutingHelper.moveToStarted(test_2);
shardRoutingMap.put(test_2, "/node1/most");
ShardRouting test_3 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 3), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
test_3 = ShardRoutingHelper.initialize(test_3, node_1.getId());
test_3 = ShardRoutingHelper.moveToStarted(test_3);
// Intentionally not in the shardRoutingMap. We want to test what happens when we don't know where it is.
RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetadata).build();
ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
logger.info("--> adding two nodes");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(node_0).add(node_1)).build();
// actual test -- after all that bloat :)
ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsages = ImmutableOpenMap.builder();
// 90% used
leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "/node0/least", 100, 10));
// 91% used
leastAvailableUsages.put("node_1", new DiskUsage("node_1", "node_1", "/node1/least", 100, 9));
ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsage = ImmutableOpenMap.builder();
// 10% used
mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "/node0/most", 100, 90));
// 10% used
mostAvailableUsage.put("node_1", new DiskUsage("node_1", "node_1", "/node1/most", 100, 90));
ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
// 10 bytes
shardSizes.put("[test][0][p]", 10L);
shardSizes.put("[test][1][p]", 10L);
shardSizes.put("[test][2][p]", 10L);
final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build());
RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime());
allocation.debugDecision(true);
Decision decision = decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation);
assertEquals(Decision.Type.YES, decision.type());
assertThat(decision.getExplanation(), containsString("there is enough disk on this node for the shard to remain, free: [10b]"));
decision = decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation);
assertEquals(Decision.Type.NO, decision.type());
assertThat(decision.getExplanation(), containsString("the shard cannot remain on this node because it is " + "above the high watermark cluster setting [cluster.routing.allocation.disk.watermark.high=90%] and there is less than " + "the required [10.0%] free disk on node, actual free: [9.0%]"));
try {
decider.canRemain(test_0, new RoutingNode("node_1", node_1), allocation);
fail("not allocated on this node");
} catch (IllegalArgumentException ex) {
// not allocated on that node
}
try {
decider.canRemain(test_1, new RoutingNode("node_0", node_0), allocation);
fail("not allocated on this node");
} catch (IllegalArgumentException ex) {
// not allocated on that node
}
decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation);
assertEquals("can stay since allocated on a different path with enough space", Decision.Type.YES, decision.type());
assertThat(decision.getExplanation(), containsString("this shard is not allocated on the most utilized disk and can remain"));
decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation);
assertEquals("can stay since we don't have information about this shard", Decision.Type.YES, decision.type());
assertThat(decision.getExplanation(), containsString("this shard is not allocated on the most utilized disk and can remain"));
}
Aggregations