use of org.elasticsearch.cluster.DiskUsage in project elasticsearch by elastic.
the class MockDiskUsagesIT method testRerouteOccursOnDiskPassingHighWatermark.
public void testRerouteOccursOnDiskPassingHighWatermark() throws Exception {
List<String> nodes = internalCluster().startNodes(3);
// Wait for all 3 nodes to be up
assertBusy(new Runnable() {
@Override
public void run() {
NodesStatsResponse resp = client().admin().cluster().prepareNodesStats().get();
assertThat(resp.getNodes().size(), equalTo(3));
}
});
// Start with all nodes at 50% usage
final MockInternalClusterInfoService cis = (MockInternalClusterInfoService) internalCluster().getInstance(ClusterInfoService.class, internalCluster().getMasterName());
cis.setUpdateFrequency(TimeValue.timeValueMillis(200));
cis.onMaster();
cis.setN1Usage(nodes.get(0), new DiskUsage(nodes.get(0), "n1", "/dev/null", 100, 50));
cis.setN2Usage(nodes.get(1), new DiskUsage(nodes.get(1), "n2", "/dev/null", 100, 50));
cis.setN3Usage(nodes.get(2), new DiskUsage(nodes.get(2), "n3", "/dev/null", 100, 50));
client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), randomFrom("20b", "80%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), randomFrom("10b", "90%")).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms")).get();
// Create an index with 10 shards so we can check allocation for it
prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 10).put("number_of_replicas", 0).put("index.routing.allocation.exclude._name", "")).get();
ensureGreen("test");
// Block until the "fake" cluster info is retrieved at least once
assertBusy(new Runnable() {
@Override
public void run() {
ClusterInfo info = cis.getClusterInfo();
logger.info("--> got: {} nodes", info.getNodeLeastAvailableDiskUsages().size());
assertThat(info.getNodeLeastAvailableDiskUsages().size(), greaterThan(0));
}
});
final List<String> realNodeNames = new ArrayList<>();
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
realNodeNames.add(node.nodeId());
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
// Update the disk usages so one node has now passed the high watermark
cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
// nothing free on node3
cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 0));
// Retrieve the count of shards on each node
final Map<String, Integer> nodesToShardCount = new HashMap<>();
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
assertThat("node1 has 5 shards", nodesToShardCount.get(realNodeNames.get(0)), equalTo(5));
assertThat("node2 has 5 shards", nodesToShardCount.get(realNodeNames.get(1)), equalTo(5));
assertThat("node3 has 0 shards", nodesToShardCount.get(realNodeNames.get(2)), equalTo(0));
}
});
// Update the disk usages so one node is now back under the high watermark
cis.setN1Usage(realNodeNames.get(0), new DiskUsage(nodes.get(0), "n1", "_na_", 100, 50));
cis.setN2Usage(realNodeNames.get(1), new DiskUsage(nodes.get(1), "n2", "_na_", 100, 50));
// node3 has free space now
cis.setN3Usage(realNodeNames.get(2), new DiskUsage(nodes.get(2), "n3", "_na_", 100, 50));
// Retrieve the count of shards on each node
nodesToShardCount.clear();
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
Iterator<RoutingNode> iter = resp.getState().getRoutingNodes().iterator();
while (iter.hasNext()) {
RoutingNode node = iter.next();
logger.info("--> node {} has {} shards", node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
nodesToShardCount.put(node.nodeId(), resp.getState().getRoutingNodes().node(node.nodeId()).numberOfOwningShards());
}
assertThat("node1 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(0)), greaterThanOrEqualTo(3));
assertThat("node2 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(1)), greaterThanOrEqualTo(3));
assertThat("node3 has at least 3 shards", nodesToShardCount.get(realNodeNames.get(2)), greaterThanOrEqualTo(3));
}
});
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDecider method canRemain.
@Override
public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
if (shardRouting.currentNodeId().equals(node.nodeId()) == false) {
throw new IllegalArgumentException("Shard [" + shardRouting + "] is not allocated on node: [" + node.nodeId() + "]");
}
final ClusterInfo clusterInfo = allocation.clusterInfo();
final ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeLeastAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
// subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
// since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, true);
final String dataPath = clusterInfo.getDataPath(shardRouting);
// If this node is already above the high threshold, the shard cannot remain (get it off!)
final double freeDiskPercentage = usage.getFreeDiskAsPercentage();
final long freeBytes = usage.getFreeBytes();
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("node [{}] has {}% free disk ({} bytes)", node.nodeId(), freeDiskPercentage, freeBytes);
}
if (dataPath == null || usage.getPath().equals(dataPath) == false) {
return allocation.decision(Decision.YES, NAME, "this shard is not allocated on the most utilized disk and can remain");
}
if (freeBytes < 0L) {
final long sizeOfRelocatingShards = sizeOfRelocatingShards(node, true, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
LOGGER.debug("fewer free bytes remaining than the size of all incoming shards: " + "usage {} on node {} including {} bytes of relocations, shard cannot remain", usage, node.nodeId(), sizeOfRelocatingShards);
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because the node has fewer free bytes remaining than the total size of all " + "incoming shards: free space [%s], relocating shards [%s]", freeBytes + sizeOfRelocatingShards, sizeOfRelocatingShards);
}
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, shard cannot remain", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s] free space on node, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
}
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdHigh()) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {}% free disk threshold ({}% free) on node {}, shard cannot remain", diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the shard cannot remain on this node because it is above the high watermark cluster setting [%s=%s] " + "and there is less than the required [%s%%] free disk on node, actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage);
}
return allocation.decision(Decision.YES, NAME, "there is enough disk on this node for the shard to remain, free: [%s]", new ByteSizeValue(freeBytes));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDecider method canAllocate.
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
ClusterInfo clusterInfo = allocation.clusterInfo();
ImmutableOpenMap<String, DiskUsage> usages = clusterInfo.getNodeMostAvailableDiskUsages();
final Decision decision = earlyTerminate(allocation, usages);
if (decision != null) {
return decision;
}
final double usedDiskThresholdLow = 100.0 - diskThresholdSettings.getFreeDiskThresholdLow();
final double usedDiskThresholdHigh = 100.0 - diskThresholdSettings.getFreeDiskThresholdHigh();
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
// and take the size into account
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, false);
// First, check that the node currently over the low watermark
// Cache the used disk percentage for displaying disk percentages consistent with documentation
double usedDiskPercentage = usage.getUsedDiskAsPercentage();
long freeBytes = usage.getFreeBytes();
if (freeBytes < 0L) {
final long sizeOfRelocatingShards = sizeOfRelocatingShards(node, false, usage.getPath(), allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
LOGGER.debug("fewer free bytes remaining than the size of all incoming shards: " + "usage {} on node {} including {} bytes of relocations, preventing allocation", usage, node.nodeId(), sizeOfRelocatingShards);
return allocation.decision(Decision.NO, NAME, "the node has fewer free bytes remaining than the total size of all incoming shards: " + "free space [%sB], relocating shards [%sB]", freeBytes + sizeOfRelocatingShards, sizeOfRelocatingShards);
}
ByteSizeValue freeBytesValue = new ByteSizeValue(freeBytes);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
}
// flag that determines whether the low threshold checks below can be skipped. We use this for a primary shard that is freshly
// allocated and empty.
boolean skipLowThresholdChecks = shardRouting.primary() && shardRouting.active() == false && shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE;
// checks for exact byte comparisons
if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
if (skipLowThresholdChecks == false) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {} free bytes threshold ({} free) on node {}, preventing allocation", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue);
} else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
// has never been allocated if it's under the high watermark
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {} free bytes threshold ({} free) on node {}, " + "but allowing allocation because primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {} free bytes threshold ({} free) on node {}, " + "preventing allocation even though primary has never been allocated", diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " + "space, actual free: [%s]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue);
}
}
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
// checks for percentage comparisons
if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdLow()) {
// If the shard is a replica or is a non-empty primary, check the low threshold
if (skipLowThresholdChecks == false) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("more than the allowed {} used disk threshold ({} used) on node [{}], preventing allocation", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the low watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage);
} else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) {
// has never been allocated if it's under the high watermark
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("more than the allowed {} used disk threshold ({} used) on node [{}], " + "but allowing allocation because primary has never been allocated", Strings.format1Decimals(usedDiskThresholdLow, "%"), Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.YES, NAME, "the node is above the low watermark, but less than the high watermark, and this primary shard has " + "never been allocated before");
} else {
// above the high watermark, so don't allow allocating the shard
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " + "preventing allocation even though primary has never been allocated", Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId());
}
return allocation.decision(Decision.NO, NAME, "the node is above the high watermark cluster setting [%s=%s], using more disk space than the maximum allowed " + "[%s%%], actual free: [%s%%]", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage);
}
}
// Secondly, check that allocating the shard to this node doesn't put it above the high watermark
final long shardSize = getExpectedShardSize(shardRouting, 0L, allocation.clusterInfo(), allocation.metadata(), allocation.routingTable());
assert shardSize >= 0 : shardSize;
double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
long freeBytesAfterShard = freeBytes - shardSize;
if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
LOGGER.warn("after allocating, node [{}] would have less than the required threshold of " + "{} free (currently {} free, estimated shard size is {}), preventing allocation", node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, new ByteSizeValue(shardSize));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to have less than the minimum required [%s] of free space (free: [%s], estimated shard size: [%s])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, new ByteSizeValue(shardSize));
}
if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
LOGGER.warn("after allocating, node [{}] would have more than the allowed " + "{} free disk threshold ({} free), preventing allocation", node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%"));
return allocation.decision(Decision.NO, NAME, "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard);
}
assert freeBytesAfterShard >= 0 : freeBytesAfterShard;
return allocation.decision(Decision.YES, NAME, "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]", freeBytesValue, new ByteSizeValue(shardSize), new ByteSizeValue(freeBytesAfterShard));
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class DiskThresholdDecider method freeDiskPercentageAfterShardAssigned.
/**
* Given the DiskUsage for a node and the size of the shard, return the
* percentage of free disk if the shard were to be allocated to the node.
* @param usage A DiskUsage for the node to have space computed for
* @param shardSize Size in bytes of the shard
* @return Percentage of free space after the shard is assigned to the node
*/
double freeDiskPercentageAfterShardAssigned(DiskUsageWithRelocations usage, Long shardSize) {
shardSize = (shardSize == null) ? 0 : shardSize;
DiskUsage newUsage = new DiskUsage(usage.getNodeId(), usage.getNodeName(), usage.getPath(), usage.getTotalBytes(), usage.getFreeBytes() - shardSize);
return newUsage.getFreeDiskAsPercentage();
}
use of org.elasticsearch.cluster.DiskUsage in project crate by crate.
the class FsProbe method stats.
public FsInfo stats(FsInfo previous, @Nullable ClusterInfo clusterInfo) throws IOException {
if (!nodeEnv.hasNodeFile()) {
return new FsInfo(System.currentTimeMillis(), null, new FsInfo.Path[0]);
}
NodePath[] dataLocations = nodeEnv.nodePaths();
FsInfo.Path[] paths = new FsInfo.Path[dataLocations.length];
for (int i = 0; i < dataLocations.length; i++) {
paths[i] = getFSInfo(dataLocations[i]);
}
FsInfo.IoStats ioStats = null;
if (Constants.LINUX) {
Set<Tuple<Integer, Integer>> devicesNumbers = new HashSet<>();
for (int i = 0; i < dataLocations.length; i++) {
if (dataLocations[i].majorDeviceNumber != -1 && dataLocations[i].minorDeviceNumber != -1) {
devicesNumbers.add(Tuple.tuple(dataLocations[i].majorDeviceNumber, dataLocations[i].minorDeviceNumber));
}
}
ioStats = ioStats(devicesNumbers, previous);
}
DiskUsage leastDiskEstimate = null;
DiskUsage mostDiskEstimate = null;
if (clusterInfo != null) {
leastDiskEstimate = clusterInfo.getNodeLeastAvailableDiskUsages().get(nodeEnv.nodeId());
mostDiskEstimate = clusterInfo.getNodeMostAvailableDiskUsages().get(nodeEnv.nodeId());
}
return new FsInfo(System.currentTimeMillis(), ioStats, paths, leastDiskEstimate, mostDiskEstimate);
}
Aggregations