use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.
the class DiskThresholdMonitor method onNewInfo.
public void onNewInfo(ClusterInfo info) {
// all ClusterInfo updates are processed and never ignored
if (checkInProgress.compareAndSet(false, true) == false) {
logger.info("skipping monitor as a check is already in progress");
return;
}
final ImmutableOpenMap<String, DiskUsage> usages = info.getNodeLeastAvailableDiskUsages();
if (usages == null) {
logger.trace("skipping monitor as no disk usage information is available");
checkFinished();
return;
}
logger.trace("processing new cluster info");
boolean reroute = false;
String explanation = "";
final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
// Clean up nodes that have been removed from the cluster
final ObjectLookupContainer<String> nodes = usages.keys();
cleanUpRemovedNodes(nodes, nodesOverLowThreshold);
cleanUpRemovedNodes(nodes, nodesOverHighThreshold);
cleanUpRemovedNodes(nodes, nodesOverHighThresholdAndRelocating);
final ClusterState state = clusterStateSupplier.get();
final Set<String> indicesToMarkReadOnly = new HashSet<>();
RoutingNodes routingNodes = state.getRoutingNodes();
Set<String> indicesNotToAutoRelease = new HashSet<>();
markNodesMissingUsageIneligibleForRelease(routingNodes, usages, indicesNotToAutoRelease);
final List<DiskUsage> usagesOverHighThreshold = new ArrayList<>();
for (final ObjectObjectCursor<String, DiskUsage> entry : usages) {
final String node = entry.key;
final DiskUsage usage = entry.value;
final RoutingNode routingNode = routingNodes.node(node);
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) {
nodesOverLowThreshold.add(node);
nodesOverHighThreshold.add(node);
nodesOverHighThresholdAndRelocating.remove(node);
if (routingNode != null) {
// might be temporarily null if the ClusterInfoService and the ClusterService are out of step
for (ShardRouting routing : routingNode) {
String indexName = routing.index().getName();
indicesToMarkReadOnly.add(indexName);
indicesNotToAutoRelease.add(indexName);
}
}
logger.warn("flood stage disk watermark [{}] exceeded on {}, all indices on this node will be marked read-only", diskThresholdSettings.describeFloodStageThreshold(), usage);
continue;
}
if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if (routingNode != null) {
// might be temporarily null if the ClusterInfoService and the ClusterService are out of step
for (ShardRouting routing : routingNode) {
String indexName = routing.index().getName();
indicesNotToAutoRelease.add(indexName);
}
}
}
final long reservedSpace = info.getReservedSpace(usage.getNodeId(), usage.getPath()).getTotal();
final DiskUsage usageWithReservedSpace = new DiskUsage(usage.getNodeId(), usage.getNodeName(), usage.getPath(), usage.getTotalBytes(), Math.max(0L, usage.getFreeBytes() - reservedSpace));
if (usageWithReservedSpace.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usageWithReservedSpace.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
nodesOverLowThreshold.add(node);
nodesOverHighThreshold.add(node);
if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
reroute = true;
explanation = "high disk watermark exceeded on one or more nodes";
usagesOverHighThreshold.add(usage);
// will log about this node when the reroute completes
} else {
logger.debug("high disk watermark exceeded on {} but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
} else if (usageWithReservedSpace.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes() || usageWithReservedSpace.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
nodesOverHighThresholdAndRelocating.remove(node);
final boolean wasUnderLowThreshold = nodesOverLowThreshold.add(node);
final boolean wasOverHighThreshold = nodesOverHighThreshold.remove(node);
assert (wasUnderLowThreshold && wasOverHighThreshold) == false;
if (wasUnderLowThreshold) {
logger.info("low disk watermark [{}] exceeded on {}, replicas will not be assigned to this node", diskThresholdSettings.describeLowThreshold(), usage);
} else if (wasOverHighThreshold) {
logger.info("high disk watermark [{}] no longer exceeded on {}, but low disk watermark [{}] is still exceeded", diskThresholdSettings.describeHighThreshold(), usage, diskThresholdSettings.describeLowThreshold());
}
} else {
nodesOverHighThresholdAndRelocating.remove(node);
if (nodesOverLowThreshold.contains(node)) {
// if we reroute now.
if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
reroute = true;
explanation = "one or more nodes has gone under the high or low watermark";
nodesOverLowThreshold.remove(node);
nodesOverHighThreshold.remove(node);
logger.info("low disk watermark [{}] no longer exceeded on {}", diskThresholdSettings.describeLowThreshold(), usage);
} else {
logger.debug("{} has gone below a disk threshold, but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
}
}
}
}
final ActionListener<Void> listener = new GroupedActionListener<>(ActionListener.wrap(this::checkFinished), 3);
if (reroute) {
logger.debug("rerouting shards: [{}]", explanation);
rerouteService.reroute("disk threshold monitor", Priority.HIGH, ActionListener.wrap(reroutedClusterState -> {
for (DiskUsage diskUsage : usagesOverHighThreshold) {
final RoutingNode routingNode = reroutedClusterState.getRoutingNodes().node(diskUsage.getNodeId());
final DiskUsage usageIncludingRelocations;
final long relocatingShardsSize;
if (routingNode != null) {
// might be temporarily null if the ClusterInfoService and the ClusterService are out of step
relocatingShardsSize = sizeOfRelocatingShards(routingNode, diskUsage, info, reroutedClusterState);
usageIncludingRelocations = new DiskUsage(diskUsage.getNodeId(), diskUsage.getNodeName(), diskUsage.getPath(), diskUsage.getTotalBytes(), diskUsage.getFreeBytes() - relocatingShardsSize);
} else {
usageIncludingRelocations = diskUsage;
relocatingShardsSize = 0L;
}
if (usageIncludingRelocations.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usageIncludingRelocations.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
nodesOverHighThresholdAndRelocating.remove(diskUsage.getNodeId());
logger.warn("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes; the node is expected to continue to exceed " + "the high disk watermark when these relocations are complete", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
} else if (nodesOverHighThresholdAndRelocating.add(diskUsage.getNodeId())) {
logger.info("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes; the node is expected to be below the high " + "disk watermark when these relocations are complete", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
} else {
logger.debug("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
}
}
setLastRunTimeMillis();
listener.onResponse(null);
}, e -> {
logger.debug("reroute failed", e);
setLastRunTimeMillis();
listener.onFailure(e);
}));
} else {
logger.trace("no reroute required");
listener.onResponse(null);
}
final Set<String> indicesToAutoRelease = StreamSupport.stream(state.routingTable().indicesRouting().spliterator(), false).map(c -> c.key).filter(index -> indicesNotToAutoRelease.contains(index) == false).filter(index -> state.getBlocks().hasIndexBlock(index, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)).collect(Collectors.toSet());
if (indicesToAutoRelease.isEmpty() == false) {
if (diskThresholdSettings.isAutoReleaseIndexEnabled()) {
logger.info("releasing read-only-allow-delete block on indices: [{}]", indicesToAutoRelease);
updateIndicesReadOnly(indicesToAutoRelease, listener, false);
} else {
deprecationLogger.deprecate(DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY.replace(".", "_"), "[{}] will be removed in version {}", DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY, LegacyESVersion.V_7_4_0.major + 1);
logger.debug("[{}] disabled, not releasing read-only-allow-delete block on indices: [{}]", DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY, indicesToAutoRelease);
listener.onResponse(null);
}
} else {
logger.trace("no auto-release required");
listener.onResponse(null);
}
indicesToMarkReadOnly.removeIf(index -> state.getBlocks().indexBlocked(ClusterBlockLevel.WRITE, index));
logger.trace("marking indices as read-only: [{}]", indicesToMarkReadOnly);
if (indicesToMarkReadOnly.isEmpty() == false) {
updateIndicesReadOnly(indicesToMarkReadOnly, listener, true);
} else {
listener.onResponse(null);
}
}
use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.
the class DiskThresholdDecider method freeDiskPercentageAfterShardAssigned.
/**
* Given the DiskUsage for a node and the size of the shard, return the
* percentage of free disk if the shard were to be allocated to the node.
* @param usage A DiskUsage for the node to have space computed for
* @param shardSize Size in bytes of the shard
* @return Percentage of free space after the shard is assigned to the node
*/
double freeDiskPercentageAfterShardAssigned(DiskUsageWithRelocations usage, Long shardSize) {
shardSize = (shardSize == null) ? 0 : shardSize;
DiskUsage newUsage = new DiskUsage(usage.getNodeId(), usage.getNodeName(), usage.getPath(), usage.getTotalBytes(), usage.getFreeBytes() - shardSize);
return newUsage.getFreeDiskAsPercentage();
}
use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.
the class DiskThresholdDecider method canRemain.
@Override
public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
if (shardRouting.currentNodeId().equals(node.nodeId()) == false) {
throw new IllegalArgumentException("Shard [" + shardRouting + "] is not allocated on node: [" + node.nodeId() + "]");
}
final ClusterInfo clusterInfo = allocation.clusterInfo();
final ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeLeastAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
// subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
// since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, true);
final String dataPath = clusterInfo.getDataPath(shardRouting);
// If this node is already above the high threshold, the shard cannot remain (get it off!)
final double freeDiskPercentage = usage.getFreeDiskAsPercentage();
final long freeBytes = usage.getFreeBytes();
if (logger.isTraceEnabled()) {
logger.trace("node [{}] has {}% free disk ({} bytes)", node.nodeId(), freeDiskPercentage, freeBytes);
}
if (dataPath == null || usage.getPath().equals(dataPath) == false) {
return allocation.decision(Decision.YES, NAME, "this shard is not allocated on the most utilized disk and can remain");
}
if (freeBytes < 0L) {
final long sizeOfRelocatingShards = sizeOfRelocatingShards(node, true, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
logger.debug("fewer free bytes remaining than the size of all incoming shards: " + "usage {} on node {} including {} bytes of relocations, shard cannot remain", usage, node.nodeId(), sizeOfRelocatingShards);
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because the node has fewer free bytes remaining than the total size of all " + "incoming shards: free space [%s], relocating shards [%s]", freeBytes + sizeOfRelocatingShards, sizeOfRelocatingShards);
}
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, shard cannot remain", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s] free space on node, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
}
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if (logger.isDebugEnabled()) {
logger.debug("less than the required {}% free disk threshold ({}% free) on node {}, shard cannot remain", diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s%%] free disk on node, actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage);
}
return allocation.decision(Decision.YES, NAME, "there is enough disk on this node for the shard to remain, free: [%s]", new ByteSizeValue(freeBytes));
}
use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.
the class DiskThresholdMonitorTests method testAutoReleaseIndices.
public void testAutoReleaseIndices() {
AtomicReference<Set<String>> indicesToMarkReadOnly = new AtomicReference<>();
AtomicReference<Set<String>> indicesToRelease = new AtomicReference<>();
AllocationService allocation = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).build());
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test_1").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1)).put(IndexMetadata.builder("test_2").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1)).build();
RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test_1")).addAsNew(metadata.index("test_2")).build();
final ClusterState clusterState = applyStartedShardsUntilNoChange(ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build(), allocation);
assertThat(clusterState.getRoutingTable().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(8));
final ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpacesBuilder = ImmutableOpenMap.builder();
final int reservedSpaceNode1 = between(0, 10);
reservedSpacesBuilder.put(new ClusterInfo.NodeAndPath("node1", "/foo/bar"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("", "", 0), reservedSpaceNode1).build());
final int reservedSpaceNode2 = between(0, 10);
reservedSpacesBuilder.put(new ClusterInfo.NodeAndPath("node2", "/foo/bar"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("", "", 0), reservedSpaceNode2).build());
ImmutableOpenMap<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpaces = reservedSpacesBuilder.build();
DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterState, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, () -> 0L, (reason, priority, listener) -> {
assertNotNull(listener);
assertThat(priority, equalTo(Priority.HIGH));
listener.onResponse(clusterState);
}) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToUpdate, ActionListener<Void> listener, boolean readOnly) {
if (readOnly) {
assertTrue(indicesToMarkReadOnly.compareAndSet(null, indicesToUpdate));
} else {
assertTrue(indicesToRelease.compareAndSet(null, indicesToUpdate));
}
listener.onResponse(null);
}
};
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
ImmutableOpenMap.Builder<String, DiskUsage> builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(0, 4)));
monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
assertEquals(new HashSet<>(Arrays.asList("test_1", "test_2")), indicesToMarkReadOnly.get());
assertNull(indicesToRelease.get());
// Reserved space is ignored when applying block
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 90)));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(5, 90)));
monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
assertNull(indicesToMarkReadOnly.get());
assertNull(indicesToRelease.get());
// Change cluster state so that "test_2" index is blocked (read only)
IndexMetadata indexMetadata = IndexMetadata.builder(clusterState.metadata().index("test_2")).settings(Settings.builder().put(clusterState.metadata().index("test_2").getSettings()).put(IndexMetadata.INDEX_BLOCKS_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), true)).build();
ClusterState clusterStateWithBlocks = ClusterState.builder(clusterState).metadata(Metadata.builder(clusterState.metadata()).put(indexMetadata, true).build()).blocks(ClusterBlocks.builder().addBlocks(indexMetadata).build()).build();
assertTrue(clusterStateWithBlocks.blocks().indexBlocked(ClusterBlockLevel.WRITE, "test_2"));
monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterStateWithBlocks, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, () -> 0L, (reason, priority, listener) -> {
assertNotNull(listener);
assertThat(priority, equalTo(Priority.HIGH));
listener.onResponse(clusterStateWithBlocks);
}) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToUpdate, ActionListener<Void> listener, boolean readOnly) {
if (readOnly) {
assertTrue(indicesToMarkReadOnly.compareAndSet(null, indicesToUpdate));
} else {
assertTrue(indicesToRelease.compareAndSet(null, indicesToUpdate));
}
listener.onResponse(null);
}
};
// When free disk on any of node1 or node2 goes below 5% flood watermark, then apply index block on indices not having the block
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 100)));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(0, 4)));
monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
assertNull(indicesToRelease.get());
// When free disk on node1 and node2 goes above 10% high watermark then release index block, ignoring reserved space
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(10, 100)));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(10, 100)));
monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
assertNull(indicesToMarkReadOnly.get());
assertThat(indicesToRelease.get(), contains("test_2"));
// When no usage information is present for node2, we don't release the block
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
monitor.onNewInfo(clusterInfo(builder.build()));
assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
assertNull(indicesToRelease.get());
// When disk usage on one node is between the high and flood-stage watermarks, nothing changes
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 9)));
builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(5, 100)));
if (randomBoolean()) {
builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
}
monitor.onNewInfo(clusterInfo(builder.build()));
assertNull(indicesToMarkReadOnly.get());
assertNull(indicesToRelease.get());
// When disk usage on one node is missing and the other is below the high watermark, nothing changes
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 100)));
if (randomBoolean()) {
builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
}
monitor.onNewInfo(clusterInfo(builder.build()));
assertNull(indicesToMarkReadOnly.get());
assertNull(indicesToRelease.get());
// When disk usage on one node is missing and the other is above the flood-stage watermark, affected indices are blocked
indicesToMarkReadOnly.set(null);
indicesToRelease.set(null);
builder = ImmutableOpenMap.builder();
builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
if (randomBoolean()) {
builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
}
monitor.onNewInfo(clusterInfo(builder.build()));
assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
assertNull(indicesToRelease.get());
}
use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.
the class DiskThresholdMonitorTests method testDiskMonitorLogging.
@TestLogging(value = "org.opensearch.cluster.routing.allocation.DiskThresholdMonitor:INFO", reason = "testing INFO/WARN logging")
public void testDiskMonitorLogging() throws IllegalAccessException {
final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
final AtomicReference<ClusterState> clusterStateRef = new AtomicReference<>(clusterState);
final AtomicBoolean advanceTime = new AtomicBoolean(randomBoolean());
final LongSupplier timeSupplier = new LongSupplier() {
long time;
@Override
public long getAsLong() {
if (advanceTime.get()) {
time += DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).getMillis() + 1;
}
logger.info("time: [{}]", time);
return time;
}
};
final AtomicLong relocatingShardSizeRef = new AtomicLong();
DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, clusterStateRef::get, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, timeSupplier, (reason, priority, listener) -> listener.onResponse(clusterStateRef.get())) {
@Override
protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, ActionListener<Void> listener, boolean readOnly) {
listener.onResponse(null);
}
@Override
long sizeOfRelocatingShards(RoutingNode routingNode, DiskUsage diskUsage, ClusterInfo info, ClusterState reroutedClusterState) {
return relocatingShardSizeRef.get();
}
};
final ImmutableOpenMap.Builder<String, DiskUsage> allDisksOkBuilder;
allDisksOkBuilder = ImmutableOpenMap.builder();
allDisksOkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(15, 100)));
final ImmutableOpenMap<String, DiskUsage> allDisksOk = allDisksOkBuilder.build();
final ImmutableOpenMap.Builder<String, DiskUsage> aboveLowWatermarkBuilder = ImmutableOpenMap.builder();
aboveLowWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(10, 14)));
final ImmutableOpenMap<String, DiskUsage> aboveLowWatermark = aboveLowWatermarkBuilder.build();
final ImmutableOpenMap.Builder<String, DiskUsage> aboveHighWatermarkBuilder = ImmutableOpenMap.builder();
aboveHighWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 9)));
final ImmutableOpenMap<String, DiskUsage> aboveHighWatermark = aboveHighWatermarkBuilder.build();
final ImmutableOpenMap.Builder<String, DiskUsage> aboveFloodStageWatermarkBuilder = ImmutableOpenMap.builder();
aboveFloodStageWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
final ImmutableOpenMap<String, DiskUsage> aboveFloodStageWatermark = aboveFloodStageWatermarkBuilder.build();
assertNoLogging(monitor, allDisksOk);
assertSingleInfoMessage(monitor, aboveLowWatermark, "low disk watermark [85%] exceeded on * replicas will not be assigned to this node");
// will do one reroute and emit warnings, but subsequent reroutes and associated messages are delayed
advanceTime.set(false);
assertSingleWarningMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
advanceTime.set(true);
assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
advanceTime.set(randomBoolean());
assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
relocatingShardSizeRef.set(-5L);
advanceTime.set(true);
assertSingleInfoMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to be below the high disk watermark when these relocations are complete");
relocatingShardSizeRef.set(0L);
// advance time long enough to do another reroute
timeSupplier.getAsLong();
// will do one reroute and emit warnings, but subsequent reroutes and associated messages are delayed
advanceTime.set(false);
assertSingleWarningMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
advanceTime.set(true);
assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
advanceTime.set(randomBoolean());
assertSingleInfoMessage(monitor, aboveLowWatermark, "high disk watermark [90%] no longer exceeded on * but low disk watermark [85%] is still exceeded");
// only log about dropping below the low disk watermark on a reroute
advanceTime.set(true);
assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
advanceTime.set(randomBoolean());
assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
advanceTime.set(true);
assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
assertSingleInfoMessage(monitor, aboveLowWatermark, "high disk watermark [90%] no longer exceeded on * but low disk watermark [85%] is still exceeded");
}
Aggregations