use of org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod in project flink by apache.
the class KubernetesResourceManagerDriverTest method testOnPodAdded.
@Test
public void testOnPodAdded() throws Exception {
new Context() {
{
final CompletableFuture<KubernetesPod> createPodFuture = new CompletableFuture<>();
final CompletableFuture<KubernetesWorkerNode> requestResourceFuture = new CompletableFuture<>();
flinkKubeClientBuilder.setCreateTaskManagerPodFunction((pod) -> {
createPodFuture.complete(pod);
return FutureUtils.completedVoidFuture();
});
runTest(() -> {
// request new pod
runInMainThread(() -> getDriver().requestResource(TASK_EXECUTOR_PROCESS_SPEC).thenAccept(requestResourceFuture::complete));
final KubernetesPod pod = new TestingKubernetesPod(createPodFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS).getName(), true, false);
// prepare validation:
// - complete requestResourceFuture in main thread with correct
// KubernetesWorkerNode
final CompletableFuture<Void> validationFuture = requestResourceFuture.thenAccept((workerNode) -> {
validateInMainThread();
assertThat(workerNode.getResourceID().toString(), is(pod.getName()));
});
// send onAdded event
getPodCallbackHandler().onAdded(Collections.singletonList(pod));
// make sure finishing validation
validationFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
});
}
};
}
use of org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod in project flink by apache.
the class Fabric8FlinkKubeClientTest method testStopAndCleanupCluster.
@Test
public void testStopAndCleanupCluster() throws Exception {
this.flinkKubeClient.createJobManagerComponent(this.kubernetesJobManagerSpecification);
final KubernetesPod kubernetesPod = new KubernetesPod(new PodBuilder().editOrNewMetadata().withName(TASKMANAGER_POD_NAME).endMetadata().editOrNewSpec().endSpec().build());
this.flinkKubeClient.createTaskManagerPod(kubernetesPod).get();
assertEquals(1, this.kubeClient.apps().deployments().inNamespace(NAMESPACE).list().getItems().size());
assertEquals(1, this.kubeClient.configMaps().inNamespace(NAMESPACE).list().getItems().size());
assertEquals(2, this.kubeClient.services().inNamespace(NAMESPACE).list().getItems().size());
assertEquals(1, this.kubeClient.pods().inNamespace(NAMESPACE).list().getItems().size());
this.flinkKubeClient.stopAndCleanupCluster(CLUSTER_ID);
assertTrue(this.kubeClient.apps().deployments().inNamespace(NAMESPACE).list().getItems().isEmpty());
}
use of org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod in project flink by apache.
the class KubernetesResourceManagerDriver method recoverWorkerNodesFromPreviousAttempts.
// ------------------------------------------------------------------------
// Internal
// ------------------------------------------------------------------------
private void recoverWorkerNodesFromPreviousAttempts() throws ResourceManagerException {
List<KubernetesPod> podList = flinkKubeClient.getPodsWithLabels(KubernetesUtils.getTaskManagerSelectors(clusterId));
final List<KubernetesWorkerNode> recoveredWorkers = new ArrayList<>();
for (KubernetesPod pod : podList) {
final KubernetesWorkerNode worker = new KubernetesWorkerNode(new ResourceID(pod.getName()));
final long attempt = worker.getAttempt();
if (attempt > currentMaxAttemptId) {
currentMaxAttemptId = attempt;
}
if (pod.isTerminated() || !pod.isScheduled()) {
stopPod(pod.getName());
} else {
recoveredWorkers.add(worker);
}
}
log.info("Recovered {} pods from previous attempts, current attempt id is {}.", recoveredWorkers.size(), ++currentMaxAttemptId);
// Should not invoke resource event handler on the main thread executor.
// We are in the initializing thread. The main thread executor is not yet ready.
getResourceEventHandler().onPreviousAttemptWorkersRecovered(recoveredWorkers);
}
use of org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod in project flink by apache.
the class KubernetesResourceManagerDriver method requestResource.
@Override
public CompletableFuture<KubernetesWorkerNode> requestResource(TaskExecutorProcessSpec taskExecutorProcessSpec) {
final KubernetesTaskManagerParameters parameters = createKubernetesTaskManagerParameters(taskExecutorProcessSpec);
final KubernetesPod taskManagerPod = KubernetesTaskManagerFactory.buildTaskManagerKubernetesPod(taskManagerPodTemplate, parameters);
final String podName = taskManagerPod.getName();
final CompletableFuture<KubernetesWorkerNode> requestResourceFuture = new CompletableFuture<>();
requestResourceFutures.put(podName, requestResourceFuture);
log.info("Creating new TaskManager pod with name {} and resource <{},{}>.", podName, parameters.getTaskManagerMemoryMB(), parameters.getTaskManagerCPU());
final CompletableFuture<Void> createPodFuture = flinkKubeClient.createTaskManagerPod(taskManagerPod);
FutureUtils.assertNoException(createPodFuture.handleAsync((ignore, exception) -> {
if (exception != null) {
log.warn("Could not create pod {}, exception: {}", podName, exception);
CompletableFuture<KubernetesWorkerNode> future = requestResourceFutures.remove(taskManagerPod.getName());
if (future != null) {
future.completeExceptionally(exception);
}
} else {
log.info("Pod {} is created.", podName);
}
return null;
}, getMainThreadExecutor()));
return requestResourceFuture;
}
use of org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod in project flink by apache.
the class KubernetesResourceManagerDriverTest method testRecoverPreviousAttemptWorkersPodTerminated.
@Test
public void testRecoverPreviousAttemptWorkersPodTerminated() throws Exception {
new Context() {
{
final KubernetesPod previousAttemptPod = new TestingKubernetesPod(CLUSTER_ID + "-taskmanager-1-1", true, true);
final CompletableFuture<String> stopPodFuture = new CompletableFuture<>();
final CompletableFuture<Collection<KubernetesWorkerNode>> recoveredWorkersFuture = new CompletableFuture<>();
flinkKubeClientBuilder.setGetPodsWithLabelsFunction((ignore) -> Collections.singletonList(previousAttemptPod)).setStopPodFunction((podName) -> {
stopPodFuture.complete(podName);
return FutureUtils.completedVoidFuture();
});
resourceEventHandlerBuilder.setOnPreviousAttemptWorkersRecoveredConsumer(recoveredWorkersFuture::complete);
runTest(() -> {
// validate the terminated pod from previous attempt is not recovered
// and is removed
assertThat(recoveredWorkersFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), empty());
assertThat(stopPodFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(previousAttemptPod.getName()));
});
}
};
}
Aggregations