use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class TaskExecutorProcessSpecContainerResourcePriorityAdapter method getTaskExecutorProcessSpecAndResource.
Optional<TaskExecutorProcessSpecAndResource> getTaskExecutorProcessSpecAndResource(Priority priority) {
final TaskExecutorProcessSpec taskExecutorProcessSpec = priorityToTaskExecutorProcessSpec.get(priority);
if (taskExecutorProcessSpec == null) {
return Optional.empty();
}
final PriorityAndResource priorityAndResource = taskExecutorProcessSpecToPriorityAndResource.get(taskExecutorProcessSpec);
Preconditions.checkState(priorityAndResource != null);
Preconditions.checkState(priority.equals(priorityAndResource.getPriority()));
return Optional.of(new TaskExecutorProcessSpecAndResource(taskExecutorProcessSpec, priorityAndResource.getResource()));
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class UtilsTest method testCreateTaskExecutorCredentials.
@Test
public void testCreateTaskExecutorCredentials() throws Exception {
File root = temporaryFolder.getRoot();
File home = new File(root, "home");
boolean created = home.mkdir();
assertTrue(created);
Configuration flinkConf = new Configuration();
YarnConfiguration yarnConf = new YarnConfiguration();
Map<String, String> env = new HashMap<>();
env.put(YarnConfigKeys.ENV_APP_ID, "foo");
env.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, home.getAbsolutePath());
env.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, "");
env.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, "");
env.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, "foo");
env.put(YarnConfigKeys.FLINK_DIST_JAR, new YarnLocalResourceDescriptor("flink.jar", new Path(root.toURI()), 0, System.currentTimeMillis(), LocalResourceVisibility.APPLICATION, LocalResourceType.FILE).toString());
env.put(YarnConfigKeys.FLINK_YARN_FILES, "");
env.put(ApplicationConstants.Environment.PWD.key(), home.getAbsolutePath());
env = Collections.unmodifiableMap(env);
final YarnResourceManagerDriverConfiguration yarnResourceManagerDriverConfiguration = new YarnResourceManagerDriverConfiguration(env, "localhost", null);
File credentialFile = temporaryFolder.newFile("container_tokens");
final Text amRmTokenKind = AMRMTokenIdentifier.KIND_NAME;
final Text hdfsDelegationTokenKind = new Text("HDFS_DELEGATION_TOKEN");
final Text amRmTokenService = new Text("rm-ip:8030");
final Text hdfsDelegationTokenService = new Text("ha-hdfs:hadoop-namespace");
Credentials amCredentials = new Credentials();
amCredentials.addToken(amRmTokenService, new Token<>(new byte[4], new byte[4], amRmTokenKind, amRmTokenService));
amCredentials.addToken(hdfsDelegationTokenService, new Token<>(new byte[4], new byte[4], hdfsDelegationTokenKind, hdfsDelegationTokenService));
amCredentials.writeTokenStorageFile(new org.apache.hadoop.fs.Path(credentialFile.getAbsolutePath()), yarnConf);
TaskExecutorProcessSpec spec = TaskExecutorProcessUtils.newProcessSpecBuilder(flinkConf).withTotalProcessMemory(MemorySize.parse("1g")).build();
ContaineredTaskManagerParameters tmParams = new ContaineredTaskManagerParameters(spec, new HashMap<>(1));
Configuration taskManagerConf = new Configuration();
String workingDirectory = root.getAbsolutePath();
Class<?> taskManagerMainClass = YarnTaskExecutorRunner.class;
ContainerLaunchContext ctx;
final Map<String, String> originalEnv = System.getenv();
try {
Map<String, String> systemEnv = new HashMap<>(originalEnv);
systemEnv.put("HADOOP_TOKEN_FILE_LOCATION", credentialFile.getAbsolutePath());
CommonTestUtils.setEnv(systemEnv);
ctx = Utils.createTaskExecutorContext(flinkConf, yarnConf, yarnResourceManagerDriverConfiguration, tmParams, "", workingDirectory, taskManagerMainClass, LOG);
} finally {
CommonTestUtils.setEnv(originalEnv);
}
Credentials credentials = new Credentials();
try (DataInputStream dis = new DataInputStream(new ByteArrayInputStream(ctx.getTokens().array()))) {
credentials.readTokenStorageStream(dis);
}
Collection<Token<? extends TokenIdentifier>> tokens = credentials.getAllTokens();
boolean hasHdfsDelegationToken = false;
boolean hasAmRmToken = false;
for (Token<? extends TokenIdentifier> token : tokens) {
if (token.getKind().equals(amRmTokenKind)) {
hasAmRmToken = true;
} else if (token.getKind().equals(hdfsDelegationTokenKind)) {
hasHdfsDelegationToken = true;
}
}
assertTrue(hasHdfsDelegationToken);
assertFalse(hasAmRmToken);
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class ActiveResourceManagerTest method testWorkerRegistrationTimeoutNotCountingAllocationTime.
@Test
public void testWorkerRegistrationTimeoutNotCountingAllocationTime() throws Exception {
new Context() {
{
final ResourceID tmResourceId = ResourceID.generate();
final CompletableFuture<ResourceID> requestResourceFuture = new CompletableFuture<>();
final CompletableFuture<ResourceID> releaseResourceFuture = new CompletableFuture<>();
flinkConfig.set(ResourceManagerOptions.TASK_MANAGER_REGISTRATION_TIMEOUT, Duration.ofMillis(TESTING_START_WORKER_TIMEOUT_MS));
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> requestResourceFuture).setReleaseResourceConsumer(releaseResourceFuture::complete);
runTest(() -> {
// request new worker
runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
// resource allocation takes longer than worker registration timeout
try {
Thread.sleep(TESTING_START_WORKER_TIMEOUT_MS * 2);
} catch (InterruptedException e) {
fail();
}
final long start = System.nanoTime();
runInMainThread(() -> requestResourceFuture.complete(tmResourceId));
// worker registered, verify not released due to timeout
RegistrationResponse registrationResponse = registerTaskExecutor(tmResourceId).join();
final long registrationTime = (System.nanoTime() - start) / 1_000_000;
assumeTrue("The registration must not take longer than the start worker timeout. If it does, then this indicates a very slow machine.", registrationTime < TESTING_START_WORKER_TIMEOUT_MS);
assertThat(registrationResponse, instanceOf(RegistrationResponse.Success.class));
assertFalse(releaseResourceFuture.isDone());
});
}
};
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class ActiveResourceManagerTest method testCloseTaskManagerConnectionOnWorkerTerminated.
@Test
public void testCloseTaskManagerConnectionOnWorkerTerminated() throws Exception {
new Context() {
{
final ResourceID tmResourceId = ResourceID.generate();
final CompletableFuture<TaskExecutorProcessSpec> requestWorkerFromDriverFuture = new CompletableFuture<>();
final CompletableFuture<Void> disconnectResourceManagerFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setDisconnectResourceManagerConsumer((ignore) -> disconnectResourceManagerFuture.complete(null)).createTestingTaskExecutorGateway();
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
requestWorkerFromDriverFuture.complete(taskExecutorProcessSpec);
return CompletableFuture.completedFuture(tmResourceId);
});
runTest(() -> {
// request a new worker, terminate it after registered
runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC)).thenCompose((ignore) -> registerTaskExecutor(tmResourceId, taskExecutorGateway)).thenRun(() -> runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceId, "terminate for testing")));
// verify task manager connection is closed
disconnectResourceManagerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
});
}
};
}
use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.
the class ActiveResourceManagerTest method testWorkerTerminatedBeforeRegister.
/**
* Tests worker terminated after requested before registered.
*/
@Test
public void testWorkerTerminatedBeforeRegister() throws Exception {
new Context() {
{
final AtomicInteger requestCount = new AtomicInteger(0);
final List<ResourceID> tmResourceIds = new ArrayList<>();
tmResourceIds.add(ResourceID.generate());
tmResourceIds.add(ResourceID.generate());
final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
int idx = requestCount.getAndIncrement();
assertThat(idx, lessThan(2));
requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
return CompletableFuture.completedFuture(tmResourceIds.get(idx));
});
slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
runTest(() -> {
// received worker request, verify requesting from driver
CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
// first worker failed before register, verify requesting another worker
// from driver
runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
// second worker registered, verify registration succeed
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceIds.get(1));
assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
});
}
};
}
Aggregations