use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.
the class MockDiskUsagesIT method testRerouteOccursOnDiskPassingHighWatermark.
public void testRerouteOccursOnDiskPassingHighWatermark() throws Exception {
List<String> nodes = internalCluster().startNodes(3);
// Wait for all 3 nodes to be up
assertBusy(new Runnable() {
@Override
public void run() {
NodesStatsResponse resp = client().admin().cluster().prepareNodesStats().get();
assertThat(resp.getNodes().size(), equalTo(3));
}
});
// Start with all nodes at 50% usage
final MockInternalClusterInfoService cis = (MockInternalClusterInfoService) internalCluster().getInstance(ClusterInfoService.class, internalCluster().getMasterName());
cis.setUpdateFrequency(TimeValue.timeValueMillis(200));
cis.onMaster();
cis.setN1Usage(nodes.get(0), new DiskUsage(nodes.get(0), "n1", "/dev/null", 100, 50));
cis.setN2Usage(nodes.get(1), new DiskUsage(nodes.get(1), "n2", "/dev/null", 100, 50));
cis.setN3Usage(nodes.get(2), new DiskUsage(nodes.get(2), "n3", "/dev/null", 100, 50));
client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), randomFrom("20b", "80%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), randomFrom("10b", "90%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms")).get();
// Create an index with 10 shards so we can check allocation for it
prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 10).put("number_of_replicas", 0).put("index.routing.allocation.exclude._name", "")).get();
ensureGreen("test");
// Block until the "fake" cluster info is retrieved at least once
assertBusy(new Runnable() {
@Override
public void run() {
ClusterInfo info = cis.getClusterInfo();
logger.info("--> got: {} nodes", info.getNodeLeastAvailableDiskUsages().size());
assertThat(info.getNodeLeastAvailableDiskUsages().size(), greaterThan(0));
}
});
final List<String> realNodeNames = new ArrayList<>();
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
realNodeNames.add(node.nodeId());
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
// Update the disk usages so one node has now passed the high watermark
cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
// nothing free on node3
cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 0));
// Retrieve the count of shards on each node
final Map<String, Integer> nodesToShardCount = new HashMap<>();
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
assertThat("node1 has 5 shards", nodesToShardCount.get(realNodeNames.get(0)), equalTo(5));
assertThat("node2 has 5 shards", nodesToShardCount.get(realNodeNames.get(1)), equalTo(5));
assertThat("node3 has 0 shards", nodesToShardCount.get(realNodeNames.get(2)), equalTo(0));
}
});
// Update the disk usages so one node is now back under the high watermark
cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
// node3 has free space now
cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 50));
// Retrieve the count of shards on each node
nodesToShardCount.clear();
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
assertThat("node1 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(0)), greaterThanOrEqualTo(3));
assertThat("node2 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(1)), greaterThanOrEqualTo(3));
assertThat("node3 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(2)), greaterThanOrEqualTo(3));
}
});
}
use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.
the class DiskThresholdDecider method canAllocate.
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
ClusterInfo clusterInfo = allocation.clusterInfo();
ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeMostAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
final double usedDiskThresholdLow = 100.0 - diskThresholdSettings.getFreeDiskThresholdLow();
final double usedDiskThresholdHigh = 100.0 - diskThresholdSettings.getFreeDiskThresholdHigh();
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
// and take the size into account
DiskUsage usage = getDiskUsage(node, allocation, usages, false);
// First, check that the node currently over the low watermark
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
// Cache the used disk percentage for displaying disk percentages consistent with documentation
double usedDiskPercentage = usage.getUsedDiskAsPercentage();
long freeBytes = usage.getFreeBytes();
if (logger.isTraceEnabled()) {
logger.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
}
// flag that determines whether the low threshold checks below can be skipped. We use this for a primary shard that is freshly
// allocated and empty.
boolean skipLowTresholdChecks = shardRouting.primary() && shardRouting.active() == false && shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE;
// checks for exact byte comparisons
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
if (skipLowTresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, preventing allocation", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes));
} else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "but allowing allocation because primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
}
}
// checks for percentage comparisons
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdLow()) {
// If the shard is a replica or is a non-empty primary, check the low threshold
if (skipLowTresholdChecks == false) {
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], preventing allocation", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage);
} else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) {
// has never been allocated if it's under the high watermark
if (logger.isDebugEnabled()) {
logger.debug("more than the allowed {} used disk threshold ({} used) on node [{}], " + "but allowing allocation because primary has never been allocated", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (logger.isDebugEnabled()) {
logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage);
}
}
// Secondly, check that allocating the shard to this node doesn't put it above the high watermark
final long shardSize = getExpectedShardSize(shardRouting, allocation, 0);
double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
long freeBytesAfterShard = freeBytes - shardSize;
if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
logger.warn("after allocating, node [{}] would have less than the required " + "{} free bytes threshold ({} bytes free), preventing allocation", node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesAfterShard);
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to have less than the minimum required [%s] of free space (free bytes after shard added: [%s])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytesAfterShard));
}
if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
logger.warn("after allocating, node [{}] would have more than the allowed " + "{} free disk threshold ({} free), preventing allocation", node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%"));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard);
}
return allocation.decision(Decision.YES, NAME, "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]", new ByteSizeValue(freeBytes), new ByteSizeValue(shardSize), new ByteSizeValue(freeBytesAfterShard));
}
use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.
the class DiskThresholdDecider method getExpectedShardSize.
/**
* Returns the expected shard size for the given shard or the default value provided if not enough information are available
* to estimate the shards size.
*/
public static long getExpectedShardSize(ShardRouting shard, RoutingAllocation allocation, long defaultValue) {
final IndexMetaData metaData = allocation.metaData().getIndexSafe(shard.index());
final ClusterInfo info = allocation.clusterInfo();
if (metaData.getMergeSourceIndex() != null && shard.active() == false && shard.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) {
// in the shrink index case we sum up the source index shards since we basically make a copy of the shard in
// the worst case
long targetShardSize = 0;
final Index mergeSourceIndex = metaData.getMergeSourceIndex();
final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex);
final Set<ShardId> shardIds = IndexMetaData.selectShrinkShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards());
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
if (shardIds.contains(shardRoutingTable.shardId())) {
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
}
}
return targetShardSize == 0 ? defaultValue : targetShardSize;
} else {
return info.getShardSize(shard, defaultValue);
}
}
use of org.elasticsearch.cluster.ClusterInfo in project elasticsearch by elastic.
the class DiskThresholdDecider method sizeOfRelocatingShards.
/**
* Returns the size of all shards that are currently being relocated to
* the node, but may not be finished transferring yet.
*
* If subtractShardsMovingAway is true then the size of shards moving away is subtracted from the total size of all shards
*/
static long sizeOfRelocatingShards(RoutingNode node, RoutingAllocation allocation, boolean subtractShardsMovingAway, String dataPath) {
ClusterInfo clusterInfo = allocation.clusterInfo();
long totalSize = 0;
for (ShardRouting routing : node.shardsWithState(ShardRoutingState.RELOCATING, ShardRoutingState.INITIALIZING)) {
String actualPath = clusterInfo.getDataPath(routing);
if (dataPath.equals(actualPath)) {
if (routing.initializing() && routing.relocatingNodeId() != null) {
totalSize += getExpectedShardSize(routing, allocation, 0);
} else if (subtractShardsMovingAway && routing.relocating()) {
totalSize -= getExpectedShardSize(routing, allocation, 0);
}
}
}
return totalSize;
}
Aggregations