use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class TaskManagerComponentsStartupShutdownTest method testComponentsStartupShutdown.
/**
* Makes sure that all components are shut down when the TaskManager
* actor is shut down.
*/
@Test
public void testComponentsStartupShutdown() {
final String[] TMP_DIR = new String[] { ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH };
final Time timeout = Time.seconds(100);
final int BUFFER_SIZE = 32 * 1024;
Configuration config = new Configuration();
config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "200 ms");
config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "1 s");
config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 1);
ActorSystem actorSystem = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(config);
final ActorRef jobManager = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
FlinkResourceManager.startResourceManagerActors(config, actorSystem, LeaderRetrievalUtils.createLeaderRetrievalService(config, jobManager), StandaloneResourceManager.class);
final int numberOfSlots = 1;
// create the components for the TaskManager manually
final TaskManagerConfiguration tmConfig = new TaskManagerConfiguration(numberOfSlots, TMP_DIR, timeout, null, Time.milliseconds(500), Time.seconds(30), Time.seconds(10), // cleanup interval
1000000, config, // exit-jvm-on-fatal-error
false);
final NetworkEnvironmentConfiguration netConf = new NetworkEnvironmentConfiguration(32, BUFFER_SIZE, MemoryType.HEAP, IOManager.IOMode.SYNC, 0, 0, 2, 8, null);
ResourceID taskManagerId = ResourceID.generate();
final TaskManagerLocation connectionInfo = new TaskManagerLocation(taskManagerId, InetAddress.getLocalHost(), 10000);
final MemoryManager memManager = new MemoryManager(32 * BUFFER_SIZE, 1, BUFFER_SIZE, MemoryType.HEAP, false);
final IOManager ioManager = new IOManagerAsync(TMP_DIR);
final NetworkEnvironment network = new NetworkEnvironment(new NetworkBufferPool(netConf.numNetworkBuffers(), netConf.networkBufferSize(), netConf.memoryType()), new LocalConnectionManager(), new ResultPartitionManager(), new TaskEventDispatcher(), new KvStateRegistry(), null, netConf.ioMode(), netConf.partitionRequestInitialBackoff(), netConf.partitionRequestMaxBackoff(), netConf.networkBuffersPerChannel(), netConf.extraNetworkBuffersPerGate());
network.start();
LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(jobManager.path().toString());
MetricRegistryConfiguration metricRegistryConfiguration = MetricRegistryConfiguration.fromConfiguration(config);
// create the task manager
final Props tmProps = Props.create(TaskManager.class, tmConfig, taskManagerId, connectionInfo, memManager, ioManager, network, numberOfSlots, leaderRetrievalService, new MetricRegistry(metricRegistryConfiguration));
final ActorRef taskManager = actorSystem.actorOf(tmProps);
new JavaTestKit(actorSystem) {
{
// wait for the TaskManager to be registered
new Within(new FiniteDuration(5000, TimeUnit.SECONDS)) {
@Override
protected void run() {
taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
expectMsgEquals(TaskManagerMessages.getRegisteredAtJobManagerMessage());
}
};
}
};
// shut down all actors and the actor system
// Kill the Task down the JobManager
taskManager.tell(Kill.getInstance(), ActorRef.noSender());
jobManager.tell(Kill.getInstance(), ActorRef.noSender());
// shut down the actors and the actor system
actorSystem.shutdown();
actorSystem.awaitTermination();
actorSystem = null;
// now that the TaskManager is shut down, the components should be shut down as well
assertTrue(network.isShutdown());
assertTrue(ioManager.isProperlyShutDown());
assertTrue(memManager.isShutdown());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
}
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class TaskManagerLocationTest method testEqualsHashAndCompareTo.
@Test
public void testEqualsHashAndCompareTo() {
try {
ResourceID resourceID1 = new ResourceID("a");
ResourceID resourceID2 = new ResourceID("b");
ResourceID resourceID3 = new ResourceID("c");
// we mock the addresses to save the times of the reverse name lookups
InetAddress address1 = mock(InetAddress.class);
when(address1.getCanonicalHostName()).thenReturn("localhost");
when(address1.getHostName()).thenReturn("localhost");
when(address1.getHostAddress()).thenReturn("127.0.0.1");
when(address1.getAddress()).thenReturn(new byte[] { 127, 0, 0, 1 });
InetAddress address2 = mock(InetAddress.class);
when(address2.getCanonicalHostName()).thenReturn("testhost1");
when(address2.getHostName()).thenReturn("testhost1");
when(address2.getHostAddress()).thenReturn("0.0.0.0");
when(address2.getAddress()).thenReturn(new byte[] { 0, 0, 0, 0 });
InetAddress address3 = mock(InetAddress.class);
when(address3.getCanonicalHostName()).thenReturn("testhost2");
when(address3.getHostName()).thenReturn("testhost2");
when(address3.getHostAddress()).thenReturn("192.168.0.1");
when(address3.getAddress()).thenReturn(new byte[] { (byte) 192, (byte) 168, 0, 1 });
// one == four != two != three
TaskManagerLocation one = new TaskManagerLocation(resourceID1, address1, 19871);
TaskManagerLocation two = new TaskManagerLocation(resourceID2, address2, 19871);
TaskManagerLocation three = new TaskManagerLocation(resourceID3, address3, 10871);
TaskManagerLocation four = new TaskManagerLocation(resourceID1, address1, 19871);
assertTrue(one.equals(four));
assertTrue(!one.equals(two));
assertTrue(!one.equals(three));
assertTrue(!two.equals(three));
assertTrue(!three.equals(four));
assertTrue(one.compareTo(four) == 0);
assertTrue(four.compareTo(one) == 0);
assertTrue(one.compareTo(two) != 0);
assertTrue(one.compareTo(three) != 0);
assertTrue(two.compareTo(three) != 0);
assertTrue(three.compareTo(four) != 0);
{
int val = one.compareTo(two);
assertTrue(two.compareTo(one) == -val);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class YarnFlinkResourceManager method reacceptRegisteredWorkers.
@Override
protected Collection<RegisteredYarnWorkerNode> reacceptRegisteredWorkers(Collection<ResourceID> toConsolidate) {
// we check for each task manager if we recognize its container
List<RegisteredYarnWorkerNode> accepted = new ArrayList<>();
for (ResourceID resourceID : toConsolidate) {
YarnContainerInLaunch yci = containersInLaunch.remove(resourceID);
if (yci != null) {
LOG.info("YARN container consolidation recognizes Resource {} ", resourceID);
accepted.add(new RegisteredYarnWorkerNode(yci.container()));
} else {
if (isStarted(resourceID)) {
LOG.info("TaskManager {} has already been registered at the resource manager.", resourceID);
} else {
LOG.info("YARN container consolidation does not recognize TaskManager {}", resourceID);
}
}
}
return accepted;
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class YarnTaskManagerRunner method runYarnTaskManager.
public static void runYarnTaskManager(String[] args, final Class<? extends YarnTaskManager> taskManager) throws IOException {
EnvironmentInformation.logEnvironmentInfo(LOG, "YARN TaskManager", args);
SignalHandler.register(LOG);
JvmShutdownSafeguard.installAsShutdownHook(LOG);
// try to parse the command line arguments
final Configuration configuration;
try {
configuration = TaskManager.parseArgsAndLoadConfig(args);
} catch (Throwable t) {
LOG.error(t.getMessage(), t);
System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE());
return;
}
// read the environment variables for YARN
final Map<String, String> envs = System.getenv();
final String yarnClientUsername = envs.get(YarnConfigKeys.ENV_HADOOP_USER_NAME);
final String localDirs = envs.get(Environment.LOCAL_DIRS.key());
LOG.info("Current working/local Directory: {}", localDirs);
final String currDir = envs.get(Environment.PWD.key());
LOG.info("Current working Directory: {}", currDir);
final String remoteKeytabPath = envs.get(YarnConfigKeys.KEYTAB_PATH);
LOG.info("TM: remoteKeytabPath obtained {}", remoteKeytabPath);
final String remoteKeytabPrincipal = envs.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
LOG.info("TM: remoteKeytabPrincipal obtained {}", remoteKeytabPrincipal);
// configure local directory
String flinkTempDirs = configuration.getString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, null);
if (flinkTempDirs == null) {
LOG.info("Setting directories for temporary file " + localDirs);
configuration.setString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, localDirs);
} else {
LOG.info("Overriding YARN's temporary file directories with those " + "specified in the Flink config: " + flinkTempDirs);
}
// tell akka to die in case of an error
configuration.setBoolean(ConfigConstants.AKKA_JVM_EXIT_ON_FATAL_ERROR, true);
String localKeytabPath = null;
if (remoteKeytabPath != null) {
File f = new File(currDir, Utils.KEYTAB_FILE_NAME);
localKeytabPath = f.getAbsolutePath();
LOG.info("localKeytabPath: {}", localKeytabPath);
}
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
LOG.info("YARN daemon is running as: {} Yarn client user obtainer: {}", currentUser.getShortUserName(), yarnClientUsername);
// Infer the resource identifier from the environment variable
String containerID = Preconditions.checkNotNull(envs.get(YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID));
final ResourceID resourceId = new ResourceID(containerID);
LOG.info("ResourceID assigned for this container: {}", resourceId);
try {
org.apache.hadoop.conf.Configuration hadoopConfiguration = null;
//To support Yarn Secure Integration Test Scenario
File krb5Conf = new File(currDir, Utils.KRB5_FILE_NAME);
if (krb5Conf.exists() && krb5Conf.canRead()) {
String krb5Path = krb5Conf.getAbsolutePath();
LOG.info("KRB5 Conf: {}", krb5Path);
hadoopConfiguration = new org.apache.hadoop.conf.Configuration();
hadoopConfiguration.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
hadoopConfiguration.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
}
// set keytab principal and replace path with the local path of the shipped keytab file in NodeManager
if (localKeytabPath != null && remoteKeytabPrincipal != null) {
configuration.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, localKeytabPath);
configuration.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, remoteKeytabPrincipal);
}
SecurityUtils.SecurityConfiguration sc;
if (hadoopConfiguration != null) {
sc = new SecurityUtils.SecurityConfiguration(configuration, hadoopConfiguration);
} else {
sc = new SecurityUtils.SecurityConfiguration(configuration);
}
SecurityUtils.install(sc);
SecurityUtils.getInstalledContext().runSecured(new Callable<Object>() {
@Override
public Integer call() {
try {
TaskManager.selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, taskManager);
} catch (Throwable t) {
LOG.error("Error while starting the TaskManager", t);
System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE());
}
return null;
}
});
} catch (Exception e) {
LOG.error("Exception occurred while launching Task Manager", e);
throw new RuntimeException(e);
}
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class ResourceManagerTaskExecutorTest method mockTaskExecutor.
private ResourceID mockTaskExecutor(String taskExecutorAddress) {
TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
ResourceID taskExecutorResourceID = ResourceID.generate();
rpcService.registerGateway(taskExecutorAddress, taskExecutorGateway);
return taskExecutorResourceID;
}
Aggregations