Search in sources :

Example 1 with DiskUsage

use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.

the class DiskThresholdMonitor method onNewInfo.

public void onNewInfo(ClusterInfo info) {
    // all ClusterInfo updates are processed and never ignored
    if (checkInProgress.compareAndSet(false, true) == false) {
        logger.info("skipping monitor as a check is already in progress");
        return;
    }
    final ImmutableOpenMap<String, DiskUsage> usages = info.getNodeLeastAvailableDiskUsages();
    if (usages == null) {
        logger.trace("skipping monitor as no disk usage information is available");
        checkFinished();
        return;
    }
    logger.trace("processing new cluster info");
    boolean reroute = false;
    String explanation = "";
    final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
    // Clean up nodes that have been removed from the cluster
    final ObjectLookupContainer<String> nodes = usages.keys();
    cleanUpRemovedNodes(nodes, nodesOverLowThreshold);
    cleanUpRemovedNodes(nodes, nodesOverHighThreshold);
    cleanUpRemovedNodes(nodes, nodesOverHighThresholdAndRelocating);
    final ClusterState state = clusterStateSupplier.get();
    final Set<String> indicesToMarkReadOnly = new HashSet<>();
    RoutingNodes routingNodes = state.getRoutingNodes();
    Set<String> indicesNotToAutoRelease = new HashSet<>();
    markNodesMissingUsageIneligibleForRelease(routingNodes, usages, indicesNotToAutoRelease);
    final List<DiskUsage> usagesOverHighThreshold = new ArrayList<>();
    for (final ObjectObjectCursor<String, DiskUsage> entry : usages) {
        final String node = entry.key;
        final DiskUsage usage = entry.value;
        final RoutingNode routingNode = routingNodes.node(node);
        if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdFloodStage().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdFloodStage()) {
            nodesOverLowThreshold.add(node);
            nodesOverHighThreshold.add(node);
            nodesOverHighThresholdAndRelocating.remove(node);
            if (routingNode != null) {
                // might be temporarily null if the ClusterInfoService and the ClusterService are out of step
                for (ShardRouting routing : routingNode) {
                    String indexName = routing.index().getName();
                    indicesToMarkReadOnly.add(indexName);
                    indicesNotToAutoRelease.add(indexName);
                }
            }
            logger.warn("flood stage disk watermark [{}] exceeded on {}, all indices on this node will be marked read-only", diskThresholdSettings.describeFloodStageThreshold(), usage);
            continue;
        }
        if (usage.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usage.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
            if (routingNode != null) {
                // might be temporarily null if the ClusterInfoService and the ClusterService are out of step
                for (ShardRouting routing : routingNode) {
                    String indexName = routing.index().getName();
                    indicesNotToAutoRelease.add(indexName);
                }
            }
        }
        final long reservedSpace = info.getReservedSpace(usage.getNodeId(), usage.getPath()).getTotal();
        final DiskUsage usageWithReservedSpace = new DiskUsage(usage.getNodeId(), usage.getNodeName(), usage.getPath(), usage.getTotalBytes(), Math.max(0L, usage.getFreeBytes() - reservedSpace));
        if (usageWithReservedSpace.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usageWithReservedSpace.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
            nodesOverLowThreshold.add(node);
            nodesOverHighThreshold.add(node);
            if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
                reroute = true;
                explanation = "high disk watermark exceeded on one or more nodes";
                usagesOverHighThreshold.add(usage);
            // will log about this node when the reroute completes
            } else {
                logger.debug("high disk watermark exceeded on {} but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
            }
        } else if (usageWithReservedSpace.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdLow().getBytes() || usageWithReservedSpace.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdLow()) {
            nodesOverHighThresholdAndRelocating.remove(node);
            final boolean wasUnderLowThreshold = nodesOverLowThreshold.add(node);
            final boolean wasOverHighThreshold = nodesOverHighThreshold.remove(node);
            assert (wasUnderLowThreshold && wasOverHighThreshold) == false;
            if (wasUnderLowThreshold) {
                logger.info("low disk watermark [{}] exceeded on {}, replicas will not be assigned to this node", diskThresholdSettings.describeLowThreshold(), usage);
            } else if (wasOverHighThreshold) {
                logger.info("high disk watermark [{}] no longer exceeded on {}, but low disk watermark [{}] is still exceeded", diskThresholdSettings.describeHighThreshold(), usage, diskThresholdSettings.describeLowThreshold());
            }
        } else {
            nodesOverHighThresholdAndRelocating.remove(node);
            if (nodesOverLowThreshold.contains(node)) {
                // if we reroute now.
                if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
                    reroute = true;
                    explanation = "one or more nodes has gone under the high or low watermark";
                    nodesOverLowThreshold.remove(node);
                    nodesOverHighThreshold.remove(node);
                    logger.info("low disk watermark [{}] no longer exceeded on {}", diskThresholdSettings.describeLowThreshold(), usage);
                } else {
                    logger.debug("{} has gone below a disk threshold, but an automatic reroute has occurred " + "in the last [{}], skipping reroute", node, diskThresholdSettings.getRerouteInterval());
                }
            }
        }
    }
    final ActionListener<Void> listener = new GroupedActionListener<>(ActionListener.wrap(this::checkFinished), 3);
    if (reroute) {
        logger.debug("rerouting shards: [{}]", explanation);
        rerouteService.reroute("disk threshold monitor", Priority.HIGH, ActionListener.wrap(reroutedClusterState -> {
            for (DiskUsage diskUsage : usagesOverHighThreshold) {
                final RoutingNode routingNode = reroutedClusterState.getRoutingNodes().node(diskUsage.getNodeId());
                final DiskUsage usageIncludingRelocations;
                final long relocatingShardsSize;
                if (routingNode != null) {
                    // might be temporarily null if the ClusterInfoService and the ClusterService are out of step
                    relocatingShardsSize = sizeOfRelocatingShards(routingNode, diskUsage, info, reroutedClusterState);
                    usageIncludingRelocations = new DiskUsage(diskUsage.getNodeId(), diskUsage.getNodeName(), diskUsage.getPath(), diskUsage.getTotalBytes(), diskUsage.getFreeBytes() - relocatingShardsSize);
                } else {
                    usageIncludingRelocations = diskUsage;
                    relocatingShardsSize = 0L;
                }
                if (usageIncludingRelocations.getFreeBytes() < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes() || usageIncludingRelocations.getFreeDiskAsPercentage() < diskThresholdSettings.getFreeDiskThresholdHigh()) {
                    nodesOverHighThresholdAndRelocating.remove(diskUsage.getNodeId());
                    logger.warn("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes; the node is expected to continue to exceed " + "the high disk watermark when these relocations are complete", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
                } else if (nodesOverHighThresholdAndRelocating.add(diskUsage.getNodeId())) {
                    logger.info("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes; the node is expected to be below the high " + "disk watermark when these relocations are complete", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
                } else {
                    logger.debug("high disk watermark [{}] exceeded on {}, shards will be relocated away from this node; " + "currently relocating away shards totalling [{}] bytes", diskThresholdSettings.describeHighThreshold(), diskUsage, -relocatingShardsSize);
                }
            }
            setLastRunTimeMillis();
            listener.onResponse(null);
        }, e -> {
            logger.debug("reroute failed", e);
            setLastRunTimeMillis();
            listener.onFailure(e);
        }));
    } else {
        logger.trace("no reroute required");
        listener.onResponse(null);
    }
    final Set<String> indicesToAutoRelease = StreamSupport.stream(state.routingTable().indicesRouting().spliterator(), false).map(c -> c.key).filter(index -> indicesNotToAutoRelease.contains(index) == false).filter(index -> state.getBlocks().hasIndexBlock(index, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)).collect(Collectors.toSet());
    if (indicesToAutoRelease.isEmpty() == false) {
        if (diskThresholdSettings.isAutoReleaseIndexEnabled()) {
            logger.info("releasing read-only-allow-delete block on indices: [{}]", indicesToAutoRelease);
            updateIndicesReadOnly(indicesToAutoRelease, listener, false);
        } else {
            deprecationLogger.deprecate(DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY.replace(".", "_"), "[{}] will be removed in version {}", DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY, LegacyESVersion.V_7_4_0.major + 1);
            logger.debug("[{}] disabled, not releasing read-only-allow-delete block on indices: [{}]", DiskThresholdSettings.AUTO_RELEASE_INDEX_ENABLED_KEY, indicesToAutoRelease);
            listener.onResponse(null);
        }
    } else {
        logger.trace("no auto-release required");
        listener.onResponse(null);
    }
    indicesToMarkReadOnly.removeIf(index -> state.getBlocks().indexBlocked(ClusterBlockLevel.WRITE, index));
    logger.trace("marking indices as read-only: [{}]", indicesToMarkReadOnly);
    if (indicesToMarkReadOnly.isEmpty() == false) {
        updateIndicesReadOnly(indicesToMarkReadOnly, listener, true);
    } else {
        listener.onResponse(null);
    }
}
Also used : ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) LongSupplier(java.util.function.LongSupplier) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Priority(org.opensearch.common.Priority) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Supplier(java.util.function.Supplier) Strings(org.opensearch.common.Strings) ArrayList(java.util.ArrayList) DeprecationLogger(org.opensearch.common.logging.DeprecationLogger) HashSet(java.util.HashSet) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) GroupedActionListener(org.opensearch.action.support.GroupedActionListener) ClusterState(org.opensearch.cluster.ClusterState) LegacyESVersion(org.opensearch.LegacyESVersion) RerouteService(org.opensearch.cluster.routing.RerouteService) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) StreamSupport(java.util.stream.StreamSupport) ActionListener(org.opensearch.action.ActionListener) DiskUsage(org.opensearch.cluster.DiskUsage) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Client(org.opensearch.client.Client) DiskThresholdDecider(org.opensearch.cluster.routing.allocation.decider.DiskThresholdDecider) ClusterInfo(org.opensearch.cluster.ClusterInfo) ClusterBlockLevel(org.opensearch.cluster.block.ClusterBlockLevel) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) Collectors(java.util.stream.Collectors) ShardRouting(org.opensearch.cluster.routing.ShardRouting) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Logger(org.apache.logging.log4j.Logger) RoutingNode(org.opensearch.cluster.routing.RoutingNode) LogManager(org.apache.logging.log4j.LogManager) ObjectLookupContainer(com.carrotsearch.hppc.ObjectLookupContainer) ClusterState(org.opensearch.cluster.ClusterState) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) ArrayList(java.util.ArrayList) DiskUsage(org.opensearch.cluster.DiskUsage) RoutingNode(org.opensearch.cluster.routing.RoutingNode) GroupedActionListener(org.opensearch.action.support.GroupedActionListener) ShardRouting(org.opensearch.cluster.routing.ShardRouting) HashSet(java.util.HashSet)

Example 2 with DiskUsage

use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.

the class DiskThresholdDecider method freeDiskPercentageAfterShardAssigned.

/**
 * Given the DiskUsage for a node and the size of the shard, return the
 * percentage of free disk if the shard were to be allocated to the node.
 * @param usage A DiskUsage for the node to have space computed for
 * @param shardSize Size in bytes of the shard
 * @return Percentage of free space after the shard is assigned to the node
 */
double freeDiskPercentageAfterShardAssigned(DiskUsageWithRelocations usage, Long shardSize) {
    shardSize = (shardSize == null) ? 0 : shardSize;
    DiskUsage newUsage = new DiskUsage(usage.getNodeId(), usage.getNodeName(), usage.getPath(), usage.getTotalBytes(), usage.getFreeBytes() - shardSize);
    return newUsage.getFreeDiskAsPercentage();
}
Also used : DiskUsage(org.opensearch.cluster.DiskUsage)

Example 3 with DiskUsage

use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.

the class DiskThresholdDecider method canRemain.

@Override
public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
    if (shardRouting.currentNodeId().equals(node.nodeId()) == false) {
        throw new IllegalArgumentException("Shard [" + shardRouting + "] is not allocated on node: [" + node.nodeId() + "]");
    }
    final ClusterInfo clusterInfo = allocation.clusterInfo();
    final ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeLeastAvailableDiskUsages();
    final Decision decision = earlyTerminate(allocation, usages);
    if (decision != null) {
        return decision;
    }
    // subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
    // since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
    final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, true);
    final String dataPath = clusterInfo.getDataPath(shardRouting);
    // If this node is already above the high threshold, the shard cannot remain (get it off!)
    final double freeDiskPercentage = usage.getFreeDiskAsPercentage();
    final long freeBytes = usage.getFreeBytes();
    if (logger.isTraceEnabled()) {
        logger.trace("node [{}] has {}% free disk ({} bytes)", node.nodeId(), freeDiskPercentage, freeBytes);
    }
    if (dataPath == null || usage.getPath().equals(dataPath) == false) {
        return allocation.decision(Decision.YES, NAME, "this shard is not allocated on the most utilized disk and can remain");
    }
    if (freeBytes < 0L) {
        final long sizeOfRelocatingShards = sizeOfRelocatingShards(node, true, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
        logger.debug("fewer free bytes remaining than the size of all incoming shards: " + "usage {} on node {} including {} bytes of relocations, shard cannot remain", usage, node.nodeId(), sizeOfRelocatingShards);
        return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because the node has fewer free bytes remaining than the total size of all " + "incoming shards: free space [%s], relocating shards [%s]", freeBytes + sizeOfRelocatingShards, sizeOfRelocatingShards);
    }
    if (freeBytes < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
        if (logger.isDebugEnabled()) {
            logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, shard cannot remain", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
        }
        return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s] free space on node, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
    }
    if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdHigh()) {
        if (logger.isDebugEnabled()) {
            logger.debug("less than the required {}% free disk threshold ({}% free) on node {}, shard cannot remain", diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage, node.nodeId());
        }
        return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s%%] free disk on node, actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage);
    }
    return allocation.decision(Decision.YES, NAME, "there is enough disk on this node for the shard to remain, free: [%s]", new ByteSizeValue(freeBytes));
}
Also used : ClusterInfo(org.opensearch.cluster.ClusterInfo) ByteSizeValue(org.opensearch.common.unit.ByteSizeValue) DiskUsage(org.opensearch.cluster.DiskUsage)

Example 4 with DiskUsage

use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.

the class DiskThresholdMonitorTests method testAutoReleaseIndices.

public void testAutoReleaseIndices() {
    AtomicReference<Set<String>> indicesToMarkReadOnly = new AtomicReference<>();
    AtomicReference<Set<String>> indicesToRelease = new AtomicReference<>();
    AllocationService allocation = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).build());
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test_1").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1)).put(IndexMetadata.builder("test_2").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1)).build();
    RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test_1")).addAsNew(metadata.index("test_2")).build();
    final ClusterState clusterState = applyStartedShardsUntilNoChange(ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build(), allocation);
    assertThat(clusterState.getRoutingTable().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(8));
    final ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpacesBuilder = ImmutableOpenMap.builder();
    final int reservedSpaceNode1 = between(0, 10);
    reservedSpacesBuilder.put(new ClusterInfo.NodeAndPath("node1", "/foo/bar"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("", "", 0), reservedSpaceNode1).build());
    final int reservedSpaceNode2 = between(0, 10);
    reservedSpacesBuilder.put(new ClusterInfo.NodeAndPath("node2", "/foo/bar"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("", "", 0), reservedSpaceNode2).build());
    ImmutableOpenMap<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> reservedSpaces = reservedSpacesBuilder.build();
    DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterState, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, () -> 0L, (reason, priority, listener) -> {
        assertNotNull(listener);
        assertThat(priority, equalTo(Priority.HIGH));
        listener.onResponse(clusterState);
    }) {

        @Override
        protected void updateIndicesReadOnly(Set<String> indicesToUpdate, ActionListener<Void> listener, boolean readOnly) {
            if (readOnly) {
                assertTrue(indicesToMarkReadOnly.compareAndSet(null, indicesToUpdate));
            } else {
                assertTrue(indicesToRelease.compareAndSet(null, indicesToUpdate));
            }
            listener.onResponse(null);
        }
    };
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    ImmutableOpenMap.Builder<String, DiskUsage> builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
    builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(0, 4)));
    monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
    assertEquals(new HashSet<>(Arrays.asList("test_1", "test_2")), indicesToMarkReadOnly.get());
    assertNull(indicesToRelease.get());
    // Reserved space is ignored when applying block
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 90)));
    builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(5, 90)));
    monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
    assertNull(indicesToMarkReadOnly.get());
    assertNull(indicesToRelease.get());
    // Change cluster state so that "test_2" index is blocked (read only)
    IndexMetadata indexMetadata = IndexMetadata.builder(clusterState.metadata().index("test_2")).settings(Settings.builder().put(clusterState.metadata().index("test_2").getSettings()).put(IndexMetadata.INDEX_BLOCKS_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), true)).build();
    ClusterState clusterStateWithBlocks = ClusterState.builder(clusterState).metadata(Metadata.builder(clusterState.metadata()).put(indexMetadata, true).build()).blocks(ClusterBlocks.builder().addBlocks(indexMetadata).build()).build();
    assertTrue(clusterStateWithBlocks.blocks().indexBlocked(ClusterBlockLevel.WRITE, "test_2"));
    monitor = new DiskThresholdMonitor(Settings.EMPTY, () -> clusterStateWithBlocks, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, () -> 0L, (reason, priority, listener) -> {
        assertNotNull(listener);
        assertThat(priority, equalTo(Priority.HIGH));
        listener.onResponse(clusterStateWithBlocks);
    }) {

        @Override
        protected void updateIndicesReadOnly(Set<String> indicesToUpdate, ActionListener<Void> listener, boolean readOnly) {
            if (readOnly) {
                assertTrue(indicesToMarkReadOnly.compareAndSet(null, indicesToUpdate));
            } else {
                assertTrue(indicesToRelease.compareAndSet(null, indicesToUpdate));
            }
            listener.onResponse(null);
        }
    };
    // When free disk on any of node1 or node2 goes below 5% flood watermark, then apply index block on indices not having the block
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 100)));
    builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(0, 4)));
    monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
    assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
    assertNull(indicesToRelease.get());
    // When free disk on node1 and node2 goes above 10% high watermark then release index block, ignoring reserved space
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(10, 100)));
    builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(10, 100)));
    monitor.onNewInfo(clusterInfo(builder.build(), reservedSpaces));
    assertNull(indicesToMarkReadOnly.get());
    assertThat(indicesToRelease.get(), contains("test_2"));
    // When no usage information is present for node2, we don't release the block
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
    monitor.onNewInfo(clusterInfo(builder.build()));
    assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
    assertNull(indicesToRelease.get());
    // When disk usage on one node is between the high and flood-stage watermarks, nothing changes
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 9)));
    builder.put("node2", new DiskUsage("node2", "node2", "/foo/bar", 100, between(5, 100)));
    if (randomBoolean()) {
        builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
    }
    monitor.onNewInfo(clusterInfo(builder.build()));
    assertNull(indicesToMarkReadOnly.get());
    assertNull(indicesToRelease.get());
    // When disk usage on one node is missing and the other is below the high watermark, nothing changes
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 100)));
    if (randomBoolean()) {
        builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
    }
    monitor.onNewInfo(clusterInfo(builder.build()));
    assertNull(indicesToMarkReadOnly.get());
    assertNull(indicesToRelease.get());
    // When disk usage on one node is missing and the other is above the flood-stage watermark, affected indices are blocked
    indicesToMarkReadOnly.set(null);
    indicesToRelease.set(null);
    builder = ImmutableOpenMap.builder();
    builder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
    if (randomBoolean()) {
        builder.put("node3", new DiskUsage("node3", "node3", "/foo/bar", 100, between(0, 100)));
    }
    monitor.onNewInfo(clusterInfo(builder.build()));
    assertThat(indicesToMarkReadOnly.get(), contains("test_1"));
    assertNull(indicesToRelease.get());
}
Also used : DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) MockLogAppender(org.opensearch.test.MockLogAppender) Arrays(java.util.Arrays) Metadata(org.opensearch.cluster.metadata.Metadata) LongSupplier(java.util.function.LongSupplier) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Level(org.apache.logging.log4j.Level) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Version(org.opensearch.Version) Priority(org.opensearch.common.Priority) AtomicReference(java.util.concurrent.atomic.AtomicReference) HashSet(java.util.HashSet) ClusterState(org.opensearch.cluster.ClusterState) OpenSearchAllocationTestCase(org.opensearch.cluster.OpenSearchAllocationTestCase) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) ActionListener(org.opensearch.action.ActionListener) DiskUsage(org.opensearch.cluster.DiskUsage) ClusterSettings(org.opensearch.common.settings.ClusterSettings) ClusterBlocks(org.opensearch.cluster.block.ClusterBlocks) ClusterInfo(org.opensearch.cluster.ClusterInfo) ClusterBlockLevel(org.opensearch.cluster.block.ClusterBlockLevel) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) ShardId(org.opensearch.index.shard.ShardId) TestLogging(org.opensearch.test.junit.annotations.TestLogging) AtomicLong(java.util.concurrent.atomic.AtomicLong) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ClusterName(org.opensearch.cluster.ClusterName) RoutingTable(org.opensearch.cluster.routing.RoutingTable) RoutingNode(org.opensearch.cluster.routing.RoutingNode) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) DiskUsage(org.opensearch.cluster.DiskUsage) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ClusterState(org.opensearch.cluster.ClusterState) AtomicReference(java.util.concurrent.atomic.AtomicReference) ClusterInfo(org.opensearch.cluster.ClusterInfo) RoutingTable(org.opensearch.cluster.routing.RoutingTable) ActionListener(org.opensearch.action.ActionListener)

Example 5 with DiskUsage

use of org.opensearch.cluster.DiskUsage in project OpenSearch by opensearch-project.

the class DiskThresholdMonitorTests method testDiskMonitorLogging.

@TestLogging(value = "org.opensearch.cluster.routing.allocation.DiskThresholdMonitor:INFO", reason = "testing INFO/WARN logging")
public void testDiskMonitorLogging() throws IllegalAccessException {
    final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
    final AtomicReference<ClusterState> clusterStateRef = new AtomicReference<>(clusterState);
    final AtomicBoolean advanceTime = new AtomicBoolean(randomBoolean());
    final LongSupplier timeSupplier = new LongSupplier() {

        long time;

        @Override
        public long getAsLong() {
            if (advanceTime.get()) {
                time += DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.get(Settings.EMPTY).getMillis() + 1;
            }
            logger.info("time: [{}]", time);
            return time;
        }
    };
    final AtomicLong relocatingShardSizeRef = new AtomicLong();
    DiskThresholdMonitor monitor = new DiskThresholdMonitor(Settings.EMPTY, clusterStateRef::get, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null, timeSupplier, (reason, priority, listener) -> listener.onResponse(clusterStateRef.get())) {

        @Override
        protected void updateIndicesReadOnly(Set<String> indicesToMarkReadOnly, ActionListener<Void> listener, boolean readOnly) {
            listener.onResponse(null);
        }

        @Override
        long sizeOfRelocatingShards(RoutingNode routingNode, DiskUsage diskUsage, ClusterInfo info, ClusterState reroutedClusterState) {
            return relocatingShardSizeRef.get();
        }
    };
    final ImmutableOpenMap.Builder<String, DiskUsage> allDisksOkBuilder;
    allDisksOkBuilder = ImmutableOpenMap.builder();
    allDisksOkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(15, 100)));
    final ImmutableOpenMap<String, DiskUsage> allDisksOk = allDisksOkBuilder.build();
    final ImmutableOpenMap.Builder<String, DiskUsage> aboveLowWatermarkBuilder = ImmutableOpenMap.builder();
    aboveLowWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(10, 14)));
    final ImmutableOpenMap<String, DiskUsage> aboveLowWatermark = aboveLowWatermarkBuilder.build();
    final ImmutableOpenMap.Builder<String, DiskUsage> aboveHighWatermarkBuilder = ImmutableOpenMap.builder();
    aboveHighWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(5, 9)));
    final ImmutableOpenMap<String, DiskUsage> aboveHighWatermark = aboveHighWatermarkBuilder.build();
    final ImmutableOpenMap.Builder<String, DiskUsage> aboveFloodStageWatermarkBuilder = ImmutableOpenMap.builder();
    aboveFloodStageWatermarkBuilder.put("node1", new DiskUsage("node1", "node1", "/foo/bar", 100, between(0, 4)));
    final ImmutableOpenMap<String, DiskUsage> aboveFloodStageWatermark = aboveFloodStageWatermarkBuilder.build();
    assertNoLogging(monitor, allDisksOk);
    assertSingleInfoMessage(monitor, aboveLowWatermark, "low disk watermark [85%] exceeded on * replicas will not be assigned to this node");
    // will do one reroute and emit warnings, but subsequent reroutes and associated messages are delayed
    advanceTime.set(false);
    assertSingleWarningMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
    advanceTime.set(true);
    assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
    advanceTime.set(randomBoolean());
    assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
    relocatingShardSizeRef.set(-5L);
    advanceTime.set(true);
    assertSingleInfoMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to be below the high disk watermark when these relocations are complete");
    relocatingShardSizeRef.set(0L);
    // advance time long enough to do another reroute
    timeSupplier.getAsLong();
    // will do one reroute and emit warnings, but subsequent reroutes and associated messages are delayed
    advanceTime.set(false);
    assertSingleWarningMessage(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
    advanceTime.set(true);
    assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
    advanceTime.set(randomBoolean());
    assertSingleInfoMessage(monitor, aboveLowWatermark, "high disk watermark [90%] no longer exceeded on * but low disk watermark [85%] is still exceeded");
    // only log about dropping below the low disk watermark on a reroute
    advanceTime.set(true);
    assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
    advanceTime.set(randomBoolean());
    assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
    assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
    advanceTime.set(true);
    assertRepeatedWarningMessages(monitor, aboveHighWatermark, "high disk watermark [90%] exceeded on * shards will be relocated away from this node* " + "the node is expected to continue to exceed the high disk watermark when these relocations are complete");
    assertSingleInfoMessage(monitor, allDisksOk, "low disk watermark [85%] no longer exceeded on *");
    assertRepeatedWarningMessages(monitor, aboveFloodStageWatermark, "flood stage disk watermark [95%] exceeded on * all indices on this node will be marked read-only");
    assertSingleInfoMessage(monitor, aboveLowWatermark, "high disk watermark [90%] no longer exceeded on * but low disk watermark [85%] is still exceeded");
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ClusterSettings(org.opensearch.common.settings.ClusterSettings) HashSet(java.util.HashSet) Set(java.util.Set) AtomicReference(java.util.concurrent.atomic.AtomicReference) DiskUsage(org.opensearch.cluster.DiskUsage) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) ClusterInfo(org.opensearch.cluster.ClusterInfo) ActionListener(org.opensearch.action.ActionListener) RoutingNode(org.opensearch.cluster.routing.RoutingNode) LongSupplier(java.util.function.LongSupplier) TestLogging(org.opensearch.test.junit.annotations.TestLogging)

Aggregations

DiskUsage (org.opensearch.cluster.DiskUsage)25 ClusterInfo (org.opensearch.cluster.ClusterInfo)20 ImmutableOpenMap (org.opensearch.common.collect.ImmutableOpenMap)19 ClusterState (org.opensearch.cluster.ClusterState)17 ClusterSettings (org.opensearch.common.settings.ClusterSettings)17 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)15 Matchers.containsString (org.hamcrest.Matchers.containsString)14 Metadata (org.opensearch.cluster.metadata.Metadata)14 RoutingTable (org.opensearch.cluster.routing.RoutingTable)14 Settings (org.opensearch.common.settings.Settings)13 RoutingNode (org.opensearch.cluster.routing.RoutingNode)11 ClusterInfoService (org.opensearch.cluster.ClusterInfoService)10 ShardRouting (org.opensearch.cluster.routing.ShardRouting)10 IndexRoutingTable (org.opensearch.cluster.routing.IndexRoutingTable)9 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)9 AllocationService (org.opensearch.cluster.routing.allocation.AllocationService)9 DiskThresholdSettings (org.opensearch.cluster.routing.allocation.DiskThresholdSettings)9 BalancedShardsAllocator (org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator)9 ShardId (org.opensearch.index.shard.ShardId)9 TestGatewayAllocator (org.opensearch.test.gateway.TestGatewayAllocator)9