Search in sources :

Example 71 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class HeartbeatManagerTest method testHeartbeatTimeout.

/**
	 * Tests that a heartbeat timeout is signaled if the heartbeat is not reported in time.
	 *
	 * @throws Exception
	 */
@Test
public void testHeartbeatTimeout() throws Exception {
    long heartbeatTimeout = 100L;
    int numHeartbeats = 10;
    long heartbeatInterval = 20L;
    Object payload = new Object();
    ResourceID ownResourceID = new ResourceID("foobar");
    ResourceID targetResourceID = new ResourceID("barfoo");
    TestingHeartbeatListener heartbeatListener = new TestingHeartbeatListener(payload);
    ScheduledExecutorService scheduledExecutorService = mock(ScheduledExecutorService.class);
    ScheduledFuture<?> scheduledFuture = mock(ScheduledFuture.class);
    doReturn(scheduledFuture).when(scheduledExecutorService).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    Object expectedObject = new Object();
    HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, ownResourceID, heartbeatListener, new DirectExecutorService(), new ScheduledExecutorServiceAdapter(new ScheduledThreadPoolExecutor(1)), LOG);
    HeartbeatTarget<Object> heartbeatTarget = mock(HeartbeatTarget.class);
    Future<ResourceID> timeoutFuture = heartbeatListener.getTimeoutFuture();
    heartbeatManager.monitorTarget(targetResourceID, heartbeatTarget);
    for (int i = 0; i < numHeartbeats; i++) {
        heartbeatManager.receiveHeartbeat(targetResourceID, expectedObject);
        Thread.sleep(heartbeatInterval);
    }
    assertFalse(timeoutFuture.isDone());
    ResourceID timeoutResourceID = timeoutFuture.get(2 * heartbeatTimeout, TimeUnit.MILLISECONDS);
    assertEquals(targetResourceID, timeoutResourceID);
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) DirectExecutorService(org.apache.flink.runtime.util.DirectExecutorService) ScheduledExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ScheduledExecutorServiceAdapter) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.Test)

Example 72 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class SchedulerTestUtils method getRandomInstance.

// --------------------------------------------------------------------------------------------
public static Instance getRandomInstance(int numSlots) {
    if (numSlots <= 0) {
        throw new IllegalArgumentException();
    }
    final ResourceID resourceID = ResourceID.generate();
    final InetAddress address;
    try {
        address = InetAddress.getByName("127.0.0.1");
    } catch (UnknownHostException e) {
        throw new RuntimeException("Test could not create IP address for localhost loopback.");
    }
    int dataPort = port.getAndIncrement();
    TaskManagerLocation ci = new TaskManagerLocation(resourceID, address, dataPort);
    final long GB = 1024L * 1024 * 1024;
    HardwareDescription resources = new HardwareDescription(4, 4 * GB, 3 * GB, 2 * GB);
    return new Instance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), ci, new InstanceID(), resources, numSlots);
}
Also used : HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) UnknownHostException(java.net.UnknownHostException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) InetAddress(java.net.InetAddress) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)

Example 73 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.

@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
    haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
    haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String jobManagerAddress = "jm";
    final UUID jmLeaderId = UUID.randomUUID();
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final String taskManagerAddress = "tm";
    final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
    final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
    final long heartbeatInterval = 1L;
    final long heartbeatTimeout = 5L;
    final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
    final JobGraph jobGraph = new JobGraph();
    try {
        final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
        // also start the heartbeat manager in job manager
        jobMaster.start(jmLeaderId);
        // register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
        jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
        ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
        Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
        ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
        Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
        // run the first heartbeat request
        heartbeatRunnable.run();
        verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
        // run the timeout runnable to simulate a heartbeat timeout
        timeoutRunnable.run();
        verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) URL(java.net.URL) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TimeoutException(java.util.concurrent.TimeoutException) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) OnCompletionActions(org.apache.flink.runtime.jobmanager.OnCompletionActions) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RestartStrategyFactory(org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 74 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class ScheduleWithCoLocationHintTest method scheduleWithIntermediateRelease.

@Test
public void scheduleWithIntermediateRelease() {
    try {
        JobVertexID jid1 = new JobVertexID();
        JobVertexID jid2 = new JobVertexID();
        JobVertexID jid3 = new JobVertexID();
        JobVertexID jid4 = new JobVertexID();
        Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
        Instance i1 = getRandomInstance(1);
        Instance i2 = getRandomInstance(1);
        scheduler.newInstanceAvailable(i1);
        scheduler.newInstanceAvailable(i2);
        assertEquals(2, scheduler.getNumberOfAvailableSlots());
        SlotSharingGroup sharingGroup = new SlotSharingGroup();
        CoLocationConstraint c1 = new CoLocationConstraint(new CoLocationGroup());
        SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 1), sharingGroup, c1), false).get();
        SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 1), sharingGroup, c1), false).get();
        SimpleSlot sSolo = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 1)), false).get();
        ResourceID taskManager = s1.getTaskManagerID();
        s1.releaseSlot();
        s2.releaseSlot();
        sSolo.releaseSlot();
        SimpleSlot sNew = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup, c1), false).get();
        assertEquals(taskManager, sNew.getTaskManagerID());
        assertEquals(2, scheduler.getNumberOfLocalizedAssignments());
        assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments());
        assertEquals(2, scheduler.getNumberOfUnconstrainedAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) SchedulerTestUtils.getRandomInstance(org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Aggregations

ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)74 Test (org.junit.Test)48 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)25 Time (org.apache.flink.api.common.time.Time)18 UUID (java.util.UUID)16 JobID (org.apache.flink.api.common.JobID)16 Configuration (org.apache.flink.configuration.Configuration)14 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)13 JavaTestKit (akka.testkit.JavaTestKit)12 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)12 InetAddress (java.net.InetAddress)11 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)10 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)10 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)10 SlotRequest (org.apache.flink.runtime.resourcemanager.SlotRequest)10 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)9 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)9 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)9 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)9 TestingSerialRpcService (org.apache.flink.runtime.rpc.TestingSerialRpcService)9