use of org.apache.flink.runtime.entrypoint.WorkingDirectory in project flink by apache.
the class TaskManagerRunner method startTaskManagerRunnerServices.
private void startTaskManagerRunnerServices() throws Exception {
synchronized (lock) {
rpcSystem = RpcSystem.load(configuration);
this.executor = Executors.newScheduledThreadPool(Hardware.getNumberCPUCores(), new ExecutorThreadFactory("taskmanager-future"));
highAvailabilityServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(configuration, executor, AddressResolution.NO_ADDRESS_RESOLUTION, rpcSystem, this);
JMXService.startInstance(configuration.getString(JMXServerOptions.JMX_SERVER_PORT));
rpcService = createRpcService(configuration, highAvailabilityServices, rpcSystem);
this.resourceId = getTaskManagerResourceID(configuration, rpcService.getAddress(), rpcService.getPort());
this.workingDirectory = ClusterEntrypointUtils.createTaskManagerWorkingDirectory(configuration, resourceId);
LOG.info("Using working directory: {}", workingDirectory);
HeartbeatServices heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
metricRegistry = new MetricRegistryImpl(MetricRegistryConfiguration.fromConfiguration(configuration, rpcSystem.getMaximumMessageSizeInBytes(configuration)), ReporterSetup.fromConfiguration(configuration, pluginManager));
final RpcService metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, rpcService.getAddress(), configuration.getString(TaskManagerOptions.BIND_HOST), rpcSystem);
metricRegistry.startQueryService(metricQueryServiceRpcService, resourceId.unwrap());
blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.unwrap().getBlobStorageDirectory()), highAvailabilityServices.createBlobStore(), null);
final ExternalResourceInfoProvider externalResourceInfoProvider = ExternalResourceUtils.createStaticExternalResourceInfoProviderFromConfig(configuration, pluginManager);
taskExecutorService = taskExecutorServiceFactory.createTaskExecutor(this.configuration, this.resourceId.unwrap(), rpcService, highAvailabilityServices, heartbeatServices, metricRegistry, blobCacheService, false, externalResourceInfoProvider, workingDirectory.unwrap(), this);
handleUnexpectedTaskExecutorServiceTermination();
MemoryLogger.startIfConfigured(LOG, configuration, terminationFuture.thenAccept(ignored -> {
}));
}
}
use of org.apache.flink.runtime.entrypoint.WorkingDirectory in project flink by apache.
the class TaskExecutorRecoveryTest method testRecoveredTaskExecutorWillRestoreAllocationState.
@Test
public void testRecoveredTaskExecutorWillRestoreAllocationState(@TempDir File tempDir) throws Exception {
final ResourceID resourceId = ResourceID.generate();
final Configuration configuration = new Configuration();
configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, 2);
configuration.set(CheckpointingOptions.LOCAL_RECOVERY, true);
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final ArrayBlockingQueue<TaskExecutorSlotReport> queue = new ArrayBlockingQueue<>(2);
testingResourceManagerGateway.setSendSlotReportFunction(slotReportInformation -> {
queue.offer(TaskExecutorSlotReport.create(slotReportInformation.f0, slotReportInformation.f2));
return CompletableFuture.completedFuture(Acknowledge.get());
});
final TestingRpcService rpcService = rpcServiceExtension.getTestingRpcService();
rpcService.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
final JobID jobId = new JobID();
final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()));
final SettableLeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);
final WorkingDirectory workingDirectory = WorkingDirectory.create(tempDir);
final TaskExecutor taskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final TaskExecutorSlotReport taskExecutorSlotReport = queue.take();
final SlotReport slotReport = taskExecutorSlotReport.getSlotReport();
assertThat(slotReport.getNumSlotStatus(), is(2));
final SlotStatus slotStatus = slotReport.iterator().next();
final SlotID allocatedSlotID = slotStatus.getSlotID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(allocatedSlotID, jobId, allocationId, slotStatus.getResourceProfile(), "localhost", testingResourceManagerGateway.getFencingToken(), Time.seconds(10L)).join();
taskExecutor.close();
final BlockingQueue<Collection<SlotOffer>> offeredSlots = new ArrayBlockingQueue<>(1);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offeredSlots.offer(new HashSet<>(slotOffers));
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
jobMasterLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// recover the TaskExecutor
final TaskExecutor recoveredTaskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
recoveredTaskExecutor.start();
final TaskExecutorSlotReport recoveredSlotReport = queue.take();
for (SlotStatus status : recoveredSlotReport.getSlotReport()) {
if (status.getSlotID().equals(allocatedSlotID)) {
assertThat(status.getJobID(), is(jobId));
assertThat(status.getAllocationID(), is(allocationId));
} else {
assertThat(status.getJobID(), is(nullValue()));
}
}
final Collection<SlotOffer> take = offeredSlots.take();
assertThat(take, hasSize(1));
final SlotOffer offeredSlot = take.iterator().next();
assertThat(offeredSlot.getAllocationId(), is(allocationId));
}
use of org.apache.flink.runtime.entrypoint.WorkingDirectory in project flink by apache.
the class TaskExecutorBuilder method build.
public TaskExecutor build() throws Exception {
final TaskExecutorBlobService resolvedTaskExecutorBlobService;
TaskExecutorResourceUtils.adjustForLocalExecution(configuration);
if (taskExecutorBlobService == null) {
resolvedTaskExecutorBlobService = NoOpTaskExecutorBlobService.INSTANCE;
} else {
resolvedTaskExecutorBlobService = taskExecutorBlobService;
}
final TaskManagerConfiguration resolvedTaskManagerConfiguration;
if (taskManagerConfiguration == null) {
resolvedTaskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration, taskExecutorResourceSpec, rpcService.getAddress(), workingDirectory.getTmpDirectory());
} else {
resolvedTaskManagerConfiguration = taskManagerConfiguration;
}
final TaskManagerServices resolvedTaskManagerServices;
if (taskManagerServices == null) {
final TaskManagerServicesConfiguration taskManagerServicesConfiguration = TaskManagerServicesConfiguration.fromConfiguration(configuration, resourceId, rpcService.getAddress(), true, taskExecutorResourceSpec, workingDirectory);
resolvedTaskManagerServices = TaskManagerServices.fromConfiguration(taskManagerServicesConfiguration, VoidPermanentBlobService.INSTANCE, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), Executors.newDirectExecutorService(), throwable -> {
}, workingDirectory);
} else {
resolvedTaskManagerServices = taskManagerServices;
}
return new TaskExecutor(rpcService, resolvedTaskManagerConfiguration, haServices, resolvedTaskManagerServices, externalResourceInfoProvider, heartbeatServices, taskManagerMetricGroup, metricQueryServiceAddress, resolvedTaskExecutorBlobService, fatalErrorHandler, partitionTracker);
}
use of org.apache.flink.runtime.entrypoint.WorkingDirectory in project flink by apache.
the class TaskExecutorLocalStateStoresManagerTest method testCreationFromConfigDefault.
/**
* This tests that the creation of {@link TaskManagerServices} correctly falls back to the first
* tmp directory of the IOManager as default for the local state root directory.
*/
@Test
public void testCreationFromConfigDefault() throws Exception {
final Configuration config = new Configuration();
final WorkingDirectory workingDirectory = WORKING_DIRECTORY_RESOURCE.createNewWorkingDirectory();
TaskManagerServicesConfiguration taskManagerServicesConfiguration = createTaskManagerServiceConfiguration(config, workingDirectory);
TaskManagerServices taskManagerServices = createTaskManagerServices(taskManagerServicesConfiguration, workingDirectory);
try {
TaskExecutorLocalStateStoresManager taskStateManager = taskManagerServices.getTaskManagerStateStore();
File[] localStateRootDirectories = taskStateManager.getLocalStateRootDirectories();
for (int i = 0; i < localStateRootDirectories.length; ++i) {
Assert.assertEquals(workingDirectory.getLocalStateDirectory(), localStateRootDirectories[i]);
}
Assert.assertFalse(taskStateManager.isLocalRecoveryEnabled());
} finally {
taskManagerServices.shutDown();
}
}
use of org.apache.flink.runtime.entrypoint.WorkingDirectory in project flink by apache.
the class TaskExecutorLocalStateStoresManagerTest method testCreationFromConfig.
/**
* This tests that the creation of {@link TaskManagerServices} correctly creates the local state
* root directory for the {@link TaskExecutorLocalStateStoresManager} with the configured root
* directory.
*/
@Test
public void testCreationFromConfig() throws Exception {
final Configuration config = new Configuration();
File newFolder = temporaryFolder.newFolder();
String tmpDir = newFolder.getAbsolutePath() + File.separator;
final String rootDirString = "__localStateRoot1,__localStateRoot2,__localStateRoot3".replaceAll("__", tmpDir);
// test configuration of the local state directories
config.setString(CheckpointingOptions.LOCAL_RECOVERY_TASK_MANAGER_STATE_ROOT_DIRS, rootDirString);
// test configuration of the local state mode
config.setBoolean(CheckpointingOptions.LOCAL_RECOVERY, true);
final WorkingDirectory workingDirectory = WORKING_DIRECTORY_RESOURCE.createNewWorkingDirectory();
TaskManagerServices taskManagerServices = createTaskManagerServices(createTaskManagerServiceConfiguration(config, workingDirectory), workingDirectory);
try {
TaskExecutorLocalStateStoresManager taskStateManager = taskManagerServices.getTaskManagerStateStore();
// verify configured directories for local state
String[] split = rootDirString.split(",");
File[] rootDirectories = taskStateManager.getLocalStateRootDirectories();
for (int i = 0; i < split.length; ++i) {
assertThat(rootDirectories[i].toPath()).startsWith(Paths.get(split[i]));
}
// verify local recovery mode
Assert.assertTrue(taskStateManager.isLocalRecoveryEnabled());
for (File rootDirectory : rootDirectories) {
FileUtils.deleteFileOrDirectory(rootDirectory);
}
} finally {
taskManagerServices.shutDown();
}
}
Aggregations