Search in sources :

Example 26 with ClusterInfo

use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.

the class MockDiskUsagesIT method testRerouteOccursOnDiskPassingHighWatermark.

public void testRerouteOccursOnDiskPassingHighWatermark() throws Exception {
    List<String> nodes = internalCluster().startNodes(3);
    // Wait for all 3 nodes to be up
    assertBusy(new Runnable() {

        @Override
        public void run() {
            NodesStatsResponse resp = client().admin().cluster().prepareNodesStats().get();
            assertThat(resp.getNodes().size(), equalTo(3));
        }
    });
    // Start with all nodes at 50% usage
    final MockInternalClusterInfoService cis = (MockInternalClusterInfoService) internalCluster().getInstance(ClusterInfoService.class, internalCluster().getMasterName());
    cis.setUpdateFrequency(TimeValue.timeValueMillis(200));
    cis.onMaster();
    cis.setN1Usage(nodes.get(0), new DiskUsage(nodes.get(0), "n1", "/dev/null", 100, 50));
    cis.setN2Usage(nodes.get(1), new DiskUsage(nodes.get(1), "n2", "/dev/null", 100, 50));
    cis.setN3Usage(nodes.get(2), new DiskUsage(nodes.get(2), "n3", "/dev/null", 100, 50));
    client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), randomFrom("20b", "80%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), randomFrom("10b", "90%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms")).get();
    // Create an index with 10 shards so we can check allocation for it
    prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 10).put("number_of_replicas", 0).put("index.routing.allocation.exclude._name", "")).get();
    ensureGreen("test");
    // Block until the "fake" cluster info is retrieved at least once
    assertBusy(new Runnable() {

        @Override
        public void run() {
            ClusterInfo info = cis.getClusterInfo();
            logger.info("--> got: {} nodes", info.getNodeLeastAvailableDiskUsages().size());
            assertThat(info.getNodeLeastAvailableDiskUsages().size(), greaterThan(0));
        }
    });
    final List<String> realNodeNames = new ArrayList<>();
    ClusterStateResponse resp = client().admin().cluster().prepareState().get();
    Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
    while (iter.hasNext()) {
        RoutingNode node = iter.next();
        realNodeNames.add(node.nodeId());
        logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
    }
    // Update the disk usages so one node has now passed the high watermark
    cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
    cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
    // nothing free on node3
    cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 0));
    // Retrieve the count of shards on each node
    final Map<String, Integer> nodesToShardCount = new HashMap<>();
    assertBusy(new Runnable() {

        @Override
        public void run() {
            ClusterStateResponse resp = client().admin().cluster().prepareState().get();
            Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
            while (iter.hasNext()) {
                RoutingNode node = iter.next();
                logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
                nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
            }
            assertThat("node1 has 5 shards", nodesToShardCount.get(realNodeNames.get(0)), equalTo(5));
            assertThat("node2 has 5 shards", nodesToShardCount.get(realNodeNames.get(1)), equalTo(5));
            assertThat("node3 has 0 shards", nodesToShardCount.get(realNodeNames.get(2)), equalTo(0));
        }
    });
    // Update the disk usages so one node is now back under the high watermark
    cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
    cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
    // node3 has free space now
    cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 50));
    // Retrieve the count of shards on each node
    nodesToShardCount.clear();
    assertBusy(new Runnable() {

        @Override
        public void run() {
            ClusterStateResponse resp = client().admin().cluster().prepareState().get();
            Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
            while (iter.hasNext()) {
                RoutingNode node = iter.next();
                logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
                nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
            }
            assertThat("node1 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(0)), greaterThanOrEqualTo(3));
            assertThat("node2 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(1)), greaterThanOrEqualTo(3));
            assertThat("node3 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(2)), greaterThanOrEqualTo(3));
        }
    });
}
Also used : MockInternalClusterInfoService(org.elasticsearch.cluster.MockInternalClusterInfoService) ClusterInfoService(org.elasticsearch.cluster.ClusterInfoService) HashMap(java.util.HashMap) ClusterStateResponse(org.elasticsearch.action.admin.cluster.state.ClusterStateResponse) ArrayList(java.util.ArrayList) DiskUsage(org.elasticsearch.cluster.DiskUsage) NodesStatsResponse(org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse) ClusterInfo(org.elasticsearch.cluster.ClusterInfo) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) Iterator(java.util.Iterator) MockInternalClusterInfoService(org.elasticsearch.cluster.MockInternalClusterInfoService)

Example 27 with ClusterInfo

use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.

the class DiskThresholdDecider method canAllocate.

@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
    ClusterInfo clusterInfo = allocation.clusterInfo();
    ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeMostAvailableDiskUsages();
    final Decision decision = earlyTerminate(allocation, usages);
    if (decision != null) {
        return decision;
    }
    final double usedDiskThresholdLow = 100.0 - diskThresholdSettings.getFreeDiskThresholdLow();
    final double usedDiskThresholdHigh = 100.0 - diskThresholdSettings.getFreeDiskThresholdHigh();
    // subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
    // and take the size into account
    DiskUsage usage = getDiskUsage(node, allocation, usages, false);
    // First, check that the node currently over the low watermark
    double freeDiskPercentage = usage.getFreeDiskAsPercentage();
    // Cache the used disk percentage for displaying disk percentages consistent with documentation
    double usedDiskPercentage = usage.getUsedDiskAsPercentage();
    long freeBytes = usage.getFreeBytes();
    if (logger.isTraceEnabled()) {
        logger.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
    }
    // flag that determines whether the low threshold checks below can be skipped. We use this for a primary shard that is freshly
    // allocated and empty.
    boolean skipLowTresholdChecks = shardRouting.primary() && shardRouting.active() == false && shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE;
    // checks for exact byte comparisons
    if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
        if (skipLowTresholdChecks == false) {
            if (logger.isDebugEnabled()) {
                logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, preventing allocation", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
            }
            return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes));
        } else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
            // has never been allocated if it's under the high watermark
            if (logger.isDebugEnabled()) {
                logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "but allowing allocation because primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
            }
            return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
        } else {
            // above the high watermark, so don't allow allocating the shard
            if (logger.isDebugEnabled()) {
                logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
            }
            return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
        }
    }
    // checks for percentage comparisons
    if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdLow()) {
        // If the shard is a replica or is a non-empty primary, check the low threshold
        if (skipLowTresholdChecks == false) {
            if (logger.isDebugEnabled()) {
                logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], preventing allocation", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
            }
            return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage);
        } else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) {
            // has never been allocated if it's under the high watermark
            if (logger.isDebugEnabled()) {
                logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], " + "but allowing allocation because primary has never been allocated", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
            }
            return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
        } else {
            // above the high watermark, so don't allow allocating the shard
            if (logger.isDebugEnabled()) {
                logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId());
            }
            return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage);
        }
    }
    // Secondly, check that allocating the shard to this node doesn't put it above the high watermark
    final long shardSize = getExpectedShardSize(shardRouting, allocation, 0);
    double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
    long freeBytesAfterShard = freeBytes - shardSize;
    if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
        logger.warn("after allocating, node [{}] would have less than the required " + "{} free bytes threshold ({} bytes free), preventing allocation", node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesAfterShard);
        return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to have less than the minimum required [%s] of free space (free bytes after shard added: [%s])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytesAfterShard));
    }
    if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
        logger.warn("after allocating, node [{}] would have more than the allowed " + "{} free disk threshold ({} free), preventing allocation", node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%"));
        return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard);
    }
    return allocation.decision(Decision.YES, NAME, "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]", new ByteSizeValue(freeBytes), new ByteSizeValue(shardSize), new ByteSizeValue(freeBytesAfterShard));
}
Also used : ClusterInfo(org.elasticsearch.cluster.ClusterInfo) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) DiskUsage(org.elasticsearch.cluster.DiskUsage)

Example 28 with ClusterInfo

use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.

the class DiskThresholdDecider method getExpectedShardSize.

/**
     * Returns the expected shard size for the given shard or the default value provided if not enough information are available
     * to estimate the shards size.
     */
public static long getExpectedShardSize(ShardRouting shard, RoutingAllocation allocation, long defaultValue) {
    final IndexMetaData metaData = allocation.metaData().getIndexSafe(shard.index());
    final ClusterInfo info = allocation.clusterInfo();
    if (metaData.getMergeSourceIndex() != null && shard.active() == false && shard.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) {
        // in the shrink index case we sum up the source index shards since we basically make a copy of the shard in
        // the worst case
        long targetShardSize = 0;
        final Index mergeSourceIndex = metaData.getMergeSourceIndex();
        final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex);
        final Set<ShardId> shardIds = IndexMetaData.selectShrinkShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards());
        for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
            if (shardIds.contains(shardRoutingTable.shardId())) {
                targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
            }
        }
        return targetShardSize == 0 ? defaultValue : targetShardSize;
    } else {
        return info.getShardSize(shard, defaultValue);
    }
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) ClusterInfo(org.elasticsearch.cluster.ClusterInfo) Index(org.elasticsearch.index.Index) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData)

Example 29 with ClusterInfo

use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.

the class DiskThresholdDecider method sizeOfRelocatingShards.

/**
     * Returns the size of all shards that are currently being relocated to
     * the node, but may not be finished transferring yet.
     *
     * If subtractShardsMovingAway is true then the size of shards moving away is subtracted from the total size of all shards
     */
static long sizeOfRelocatingShards(RoutingNode node, RoutingAllocation allocation, boolean subtractShardsMovingAway, String dataPath) {
    ClusterInfo clusterInfo = allocation.clusterInfo();
    long totalSize = 0;
    for (ShardRouting routing : node.shardsWithState(ShardRoutingState.RELOCATING, ShardRoutingState.INITIALIZING)) {
        String actualPath = clusterInfo.getDataPath(routing);
        if (dataPath.equals(actualPath)) {
            if (routing.initializing() && routing.relocatingNodeId() != null) {
                totalSize += getExpectedShardSize(routing, allocation, 0);
            } else if (subtractShardsMovingAway && routing.relocating()) {
                totalSize -= getExpectedShardSize(routing, allocation, 0);
            }
        }
    }
    return totalSize;
}
Also used : ClusterInfo(org.elasticsearch.cluster.ClusterInfo) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting)

Aggregations

ClusterInfo (org.elasticsearch.cluster.ClusterInfo)29 Matchers.containsString (org.hamcrest.Matchers.containsString)16 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)15 ClusterState (org.elasticsearch.cluster.ClusterState)14 MetaData (org.elasticsearch.cluster.metadata.MetaData)14 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)14 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)14 ShardId (org.elasticsearch.index.shard.ShardId)14 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)13 DiskUsage (org.elasticsearch.cluster.DiskUsage)12 ClusterInfoService (org.elasticsearch.cluster.ClusterInfoService)11 DevNullClusterInfo (org.elasticsearch.cluster.MockInternalClusterInfoService.DevNullClusterInfo)11 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)11 ImmutableOpenMap (org.elasticsearch.common.collect.ImmutableOpenMap)11 ShardRoutingState (org.elasticsearch.cluster.routing.ShardRoutingState)9 AllocateUnassignedDecision (org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision)9 MoveDecision (org.elasticsearch.cluster.routing.allocation.MoveDecision)9 ClusterSettings (org.elasticsearch.common.settings.ClusterSettings)9 XContentParser (org.elasticsearch.common.xcontent.XContentParser)9 IndexShardRoutingTable (org.elasticsearch.cluster.routing.IndexShardRoutingTable)8