Search in sources :

Example 1 with TaskExecutorStateChangelogStoragesManager

use of org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager in project flink by apache.

the class TaskExecutorTest method createTaskExecutorTestingContext.

private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException {
    final OneShotLatch offerSlotsLatch = new OneShotLatch();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
        offerSlotsLatch.trigger();
        return CompletableFuture.completedFuture(slotOffers);
    }).build();
    rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
    TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager();
    TaskExecutorStateChangelogStoragesManager changelogStoragesManager = new TaskExecutorStateChangelogStoragesManager();
    TaskManagerMetricGroup metricGroup = TaskManagerMetricGroup.createTaskManagerMetricGroup(NoOpMetricRegistry.INSTANCE, "", ResourceID.generate());
    final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setJobLeaderService(jobLeaderService).setTaskStateManager(stateStoresManager).setTaskChangelogStoragesManager(changelogStoragesManager).build(), HEARTBEAT_SERVICES, metricGroup);
    jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
    return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor, changelogStoragesManager, metricGroup, offerSlotsLatch);
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager)

Example 2 with TaskExecutorStateChangelogStoragesManager

use of org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager in project flink by apache.

the class TaskManagerServices method fromConfiguration.

// --------------------------------------------------------------------------------------------
// Static factory methods for task manager services
// --------------------------------------------------------------------------------------------
/**
 * Creates and returns the task manager services.
 *
 * @param taskManagerServicesConfiguration task manager configuration
 * @param permanentBlobService permanentBlobService used by the services
 * @param taskManagerMetricGroup metric group of the task manager
 * @param ioExecutor executor for async IO operations
 * @param fatalErrorHandler to handle class loading OOMs
 * @param workingDirectory the working directory of the process
 * @return task manager components
 * @throws Exception
 */
public static TaskManagerServices fromConfiguration(TaskManagerServicesConfiguration taskManagerServicesConfiguration, PermanentBlobService permanentBlobService, MetricGroup taskManagerMetricGroup, ExecutorService ioExecutor, FatalErrorHandler fatalErrorHandler, WorkingDirectory workingDirectory) throws Exception {
    // pre-start checks
    checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths());
    final TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
    // start the I/O manager, it will create some temp directories.
    final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths());
    final ShuffleEnvironment<?, ?> shuffleEnvironment = createShuffleEnvironment(taskManagerServicesConfiguration, taskEventDispatcher, taskManagerMetricGroup, ioExecutor);
    final int listeningDataPort = shuffleEnvironment.start();
    final KvStateService kvStateService = KvStateService.fromConfiguration(taskManagerServicesConfiguration);
    kvStateService.start();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new UnresolvedTaskManagerLocation(taskManagerServicesConfiguration.getResourceID(), taskManagerServicesConfiguration.getExternalAddress(), // iff the external data port is not explicitly defined
    taskManagerServicesConfiguration.getExternalDataPort() > 0 ? taskManagerServicesConfiguration.getExternalDataPort() : listeningDataPort);
    final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager();
    final TaskSlotTable<Task> taskSlotTable = createTaskSlotTable(taskManagerServicesConfiguration.getNumberOfSlots(), taskManagerServicesConfiguration.getTaskExecutorResourceSpec(), taskManagerServicesConfiguration.getTimerServiceShutdownTimeout(), taskManagerServicesConfiguration.getPageSize(), ioExecutor);
    final JobTable jobTable = DefaultJobTable.create();
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getRetryingRegistrationConfiguration());
    final TaskExecutorLocalStateStoresManager taskStateManager = new TaskExecutorLocalStateStoresManager(taskManagerServicesConfiguration.isLocalRecoveryEnabled(), taskManagerServicesConfiguration.getLocalRecoveryStateDirectories(), ioExecutor);
    final TaskExecutorStateChangelogStoragesManager changelogStoragesManager = new TaskExecutorStateChangelogStoragesManager();
    final boolean failOnJvmMetaspaceOomError = taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.FAIL_ON_USER_CLASS_LOADING_METASPACE_OOM);
    final boolean checkClassLoaderLeak = taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.CHECK_LEAKED_CLASSLOADER);
    final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(permanentBlobService, BlobLibraryCacheManager.defaultClassLoaderFactory(taskManagerServicesConfiguration.getClassLoaderResolveOrder(), taskManagerServicesConfiguration.getAlwaysParentFirstLoaderPatterns(), failOnJvmMetaspaceOomError ? fatalErrorHandler : null, checkClassLoaderLeak));
    final SlotAllocationSnapshotPersistenceService slotAllocationSnapshotPersistenceService;
    if (taskManagerServicesConfiguration.isLocalRecoveryEnabled()) {
        slotAllocationSnapshotPersistenceService = new FileSlotAllocationSnapshotPersistenceService(workingDirectory.getSlotAllocationSnapshotDirectory());
    } else {
        slotAllocationSnapshotPersistenceService = NoOpSlotAllocationSnapshotPersistenceService.INSTANCE;
    }
    return new TaskManagerServices(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getManagedMemorySize().getBytes(), ioManager, shuffleEnvironment, kvStateService, broadcastVariableManager, taskSlotTable, jobTable, jobLeaderService, taskStateManager, changelogStoragesManager, taskEventDispatcher, ioExecutor, libraryCacheManager, slotAllocationSnapshotPersistenceService);
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Task(org.apache.flink.runtime.taskmanager.Task) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) NoOpSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.NoOpSlotAllocationSnapshotPersistenceService) SlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.SlotAllocationSnapshotPersistenceService) FileSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.FileSlotAllocationSnapshotPersistenceService) FileSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.FileSlotAllocationSnapshotPersistenceService) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager)

Aggregations

File (java.io.File)1 IOException (java.io.IOException)1 InetAddress (java.net.InetAddress)1 ArrayDeque (java.util.ArrayDeque)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 List (java.util.List)1 Queue (java.util.Queue)1 UUID (java.util.UUID)1 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)1 BlockingQueue (java.util.concurrent.BlockingQueue)1 Callable (java.util.concurrent.Callable)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 CompletionException (java.util.concurrent.CompletionException)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 ExecutionException (java.util.concurrent.ExecutionException)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 TimeUnit (java.util.concurrent.TimeUnit)1