use of org.bf2.performance.TestUtils.AvailableResources in project kas-fleetshard by bf2fc6cc711aee1a0c2a.
the class InstanceProfiler method setup.
private void setup() throws Exception {
readResults();
if (profilingResult.name == null) {
profilingResult.name = "profile-" + Environment.DATE_FORMAT.format(LocalDateTime.now());
}
logDir = new File("target", profilingResult.name);
Files.createDirectories(logDir.toPath());
kafkaCluster = KubeClusterResource.connectToKubeCluster(PerformanceEnvironment.KAFKA_KUBECONFIG);
profilingResult.kafkaNodeType = kafkaCluster.getWorkerNodes().get(0).getMetadata().getLabels().get("node.kubernetes.io/instance-type");
kafkaProvisioner = ManagedKafkaProvisioner.create(kafkaCluster);
kafkaProvisioner.setup();
omb = new OMB(KubeClusterResource.connectToKubeCluster(PerformanceEnvironment.OMB_KUBECONFIG));
omb.install(kafkaProvisioner.getTlsConfig());
// TODO: if there is an existing result, make sure it's the same test setup
profilingResult.ombNodeType = omb.getOmbCluster().getWorkerNodes().get(0).getMetadata().getLabels().get("node.kubernetes.io/instance-type");
profilingResult.ombWorkerNodes = omb.getOmbCluster().getWorkerNodes().size();
AvailableResources resources = getMinAvailableResources(omb.getOmbCluster().getWorkerNodes().stream());
// use all available resources on the worker nodes with 2 workers per node
// if (resources.memoryBytes > 16*ONE_GB || resources.memoryBytes < 8*ONE_GB) {
// throw new IllegalStateException("Client instance types are expected to have 16 GB");
// }
// assume instead resources that will fit on 2xlarge or xlarge
resources.cpuMillis = Math.min(6400, resources.cpuMillis);
resources.memoryBytes = Math.min(12 * ONE_GB, resources.memoryBytes);
omb.setWorkerCpu(Quantity.parse(resources.cpuMillis / 2 + "m"));
omb.setWorkerContainerMemory(Quantity.parse(String.valueOf(resources.memoryBytes / 2)));
profilingResult.ombWorkerCpu = omb.getWorkerCpu();
profilingResult.ombWorkerMemory = omb.getWorkerContainerMemory();
LOGGER.info("OMB Workers will use {} cpu and {} memory requests", omb.getWorkerCpu(), omb.getWorkerContainerMemory());
if (profilingResult.completedStep == null) {
installedProvisioner = true;
kafkaProvisioner.install();
writeResults(Step.SETUP);
}
}
use of org.bf2.performance.TestUtils.AvailableResources in project kas-fleetshard by bf2fc6cc711aee1a0c2a.
the class InstanceProfiler method sizeInstance.
protected void sizeInstance() throws Exception {
Stream<Node> workerNodes = kafkaCluster.getWorkerNodes().stream();
if (!collocateBrokerWithZookeeper) {
kafkaProvisioner.validateClusterForBrokers(numberOfBrokers, false, workerNodes);
workerNodes = kafkaCluster.getWorkerNodes().stream().filter(n -> n.getSpec().getTaints().stream().anyMatch(t -> t.getKey().equals(ManagedKafkaProvisioner.KAFKA_BROKER_TAINT_KEY)));
}
// note these number seem to change per release - 4.9 reports a different allocatable, than 4.8
AvailableResources resources = getMinAvailableResources(workerNodes);
long cpuMillis = resources.cpuMillis;
long memoryBytes = resources.memoryBytes;
Properties p = new Properties();
try (InputStream is = InstanceProfiler.class.getResourceAsStream("/application.properties")) {
p.load(is);
}
KafkaInstanceConfiguration defaults = Serialization.jsonMapper().convertValue(p, KafkaInstanceConfiguration.class);
// when locating with ZK, then reduce the available resources accordingly
if (collocateBrokerWithZookeeper) {
// earlier code making a guess at the page cache size has been removed - until we can more reliably detect it's effect
// there's no point in making a trade-off between extra container memory and JVM memory
// TODO: could choose a memory size where we can fit even multiples of zookeepers
long zookeeperBytes = Quantity.getAmountInBytes(Quantity.parse(defaults.getZookeeper().getContainerMemory())).longValue();
long zookeeperCpu = Quantity.getAmountInBytes(Quantity.parse(defaults.getZookeeper().getContainerCpu())).movePointRight(3).longValue();
List<Long> additionalPodCpu = new ArrayList<>();
List<Long> additionalPodMemory = new ArrayList<>();
additionalPodCpu.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getCanary().getContainerCpu())).movePointRight(3).longValue());
additionalPodMemory.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getCanary().getContainerMemory())).longValue());
additionalPodCpu.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getAdminserver().getContainerCpu())).movePointRight(3).longValue());
additionalPodMemory.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getAdminserver().getContainerMemory())).longValue());
additionalPodCpu.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getExporter().getContainerCpu())).movePointRight(3).longValue());
additionalPodMemory.add(Quantity.getAmountInBytes(Quantity.parse(defaults.getExporter().getContainerMemory())).longValue());
LOGGER.info("Total overhead of additional pods {} memory, {} cpu", additionalPodMemory.stream().collect(Collectors.summingLong(Long::valueOf)), additionalPodCpu.stream().collect(Collectors.summingLong(Long::valueOf)));
// actual needs ~ 800Mi and 1075m/1575m cpu over 3 nodes, but worst case is over two. amountNeeded will
// estimate that in a more targeted way - but still simplified
memoryBytes = resources.memoryBytes - density * (zookeeperBytes + amountNeeded(additionalPodMemory));
cpuMillis = resources.cpuMillis - density * (zookeeperCpu + amountNeeded(additionalPodCpu));
// TODO account for possible ingress replica collocation
}
// and if there are eventually pods that need to be collocated, and we don't want to adjust the resources downward
if (density == 1) {
memoryBytes -= 2 * ONE_GB;
cpuMillis -= 500;
} else {
// we can assume a much tighter resource utilization for density 2 - it can fluctuate between releases
// or may require adjustments as other pods are added or pod resource adjustments are made
memoryBytes -= 1 * ONE_GB;
cpuMillis -= 200;
}
memoryBytes = memoryBytes / density;
cpuMillis = cpuMillis / density;
long maxVmBytes = Math.min(memoryBytes - getVMOverheadForContainer(memoryBytes), MAX_KAFKA_VM_SIZE);
if (density > 1) {
maxVmBytes -= 1 * ONE_GB;
}
if (!autoSize) {
long defaultMemory = Quantity.getAmountInBytes(Quantity.parse(defaults.getKafka().getContainerMemory())).longValue();
long defaultCpu = Quantity.getAmountInBytes(Quantity.parse(defaults.getKafka().getContainerCpu())).movePointRight(3).longValue();
long defaultMaxVmBytes = Quantity.getAmountInBytes(Quantity.parse(defaults.getKafka().getJvmXms())).longValue();
LOGGER.info("Calculated kafka sizing {} container memory, {} container cpu, and {} vm memory", memoryBytes, cpuMillis, maxVmBytes);
memoryBytes = defaultMemory;
cpuMillis = defaultCpu;
maxVmBytes = defaultMaxVmBytes;
}
KafkaInstanceConfiguration toUse = new KafkaInstanceConfiguration();
toUse.getKafka().setEnableQuota(false);
AdopterProfile.openListenersAndAccess(toUse);
toUse.getKafka().setContainerCpu(cpuMillis + "m");
toUse.getKafka().setJvmXms(String.valueOf(maxVmBytes));
toUse.getKafka().setContainerMemory(String.valueOf(memoryBytes));
profilingResult.config = toUse;
profilingResult.config.getKafka().setColocateWithZookeeper(collocateBrokerWithZookeeper);
profilingResult.config.getKafka().setMaxConnections(Integer.MAX_VALUE);
profilingResult.config.getKafka().setConnectionAttemptsPerSec(Integer.MAX_VALUE);
profilingResult.config.getKafka().setMessageMaxBytes(11534336);
profilingResult.config.getKafka().setStorageClass(storage.name().toLowerCase());
profilingResult.config.getZookeeper().setVolumeSize(storage.zookeeperSize);
// once we make the determination, create the instance
// not used as quota is turned off
profilingResult.capacity = kafkaProvisioner.defaultCapacity(40_000_000);
profilingResult.capacity.setMaxDataRetentionSize(Quantity.parse((GIGS * numberOfBrokers / 3) + "Gi"));
profilingResult.capacity.setMaxPartitions(defaults.getKafka().getPartitionCapacity() * numberOfBrokers / 3);
Kafka kafka = profilingResult.config.getKafka();
LOGGER.info("Running with kafka sizing {} container memory, {} container cpu, and {} vm memory", kafka.getContainerMemory(), kafka.getContainerCpu(), kafka.getJvmXms());
// if running on m5.4xlarge or greater and want to constrain resources like m5.2xlarge (fully dedicated)
// profilingResult.config.getKafka().setContainerMemory("29013426176");
// profilingResult.config.getKafka().setContainerCpu("6500m");
// to constrain resources like m5.xlarge (fully dedicated)
// profilingResult.config.getKafka().setContainerMemory("12453740544");
// profilingResult.config.getKafka().setContainerCpu("2500m");
}
Aggregations