Search in sources :

Example 21 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class HeartbeatManagerTest method testHeartbeatCluster.

/**
	 * Tests the heartbeat interplay between the {@link HeartbeatManagerImpl} and the
	 * {@link HeartbeatManagerSenderImpl}. The sender should regularly trigger heartbeat requests
	 * which are fulfilled by the receiver. Upon stopping the receiver, the sender should notify
	 * the heartbeat listener about the heartbeat timeout.
	 *
	 * @throws Exception
	 */
@Test
public void testHeartbeatCluster() throws Exception {
    long heartbeatTimeout = 100L;
    long heartbeatPeriod = 20L;
    Object object = new Object();
    Object object2 = new Object();
    ResourceID resourceID = new ResourceID("foobar");
    ResourceID resourceID2 = new ResourceID("barfoo");
    HeartbeatListener<Object, Object> heartbeatListener = mock(HeartbeatListener.class);
    when(heartbeatListener.retrievePayload()).thenReturn(FlinkCompletableFuture.completed(object));
    TestingHeartbeatListener heartbeatListener2 = new TestingHeartbeatListener(object2);
    Future<ResourceID> futureTimeout = heartbeatListener2.getTimeoutFuture();
    HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, resourceID, heartbeatListener, new DirectExecutorService(), new ScheduledExecutorServiceAdapter(new ScheduledThreadPoolExecutor(1)), LOG);
    HeartbeatManagerSenderImpl<Object, Object> heartbeatManager2 = new HeartbeatManagerSenderImpl<>(heartbeatPeriod, heartbeatTimeout, resourceID2, heartbeatListener2, new DirectExecutorService(), new ScheduledExecutorServiceAdapter(new ScheduledThreadPoolExecutor(1)), LOG);
    ;
    heartbeatManager.monitorTarget(resourceID2, heartbeatManager2);
    heartbeatManager2.monitorTarget(resourceID, heartbeatManager);
    Thread.sleep(2 * heartbeatTimeout);
    assertFalse(futureTimeout.isDone());
    heartbeatManager.stop();
    ResourceID timeoutResourceID = futureTimeout.get(2 * heartbeatTimeout, TimeUnit.MILLISECONDS);
    assertEquals(resourceID, timeoutResourceID);
    int numberHeartbeats = (int) (2 * heartbeatTimeout / heartbeatPeriod);
    verify(heartbeatListener, atLeast(numberHeartbeats / 2)).reportPayload(resourceID2, object2);
    assertTrue(heartbeatListener2.getNumberHeartbeatReports() >= numberHeartbeats / 2);
}
Also used : ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) DirectExecutorService(org.apache.flink.runtime.util.DirectExecutorService) ScheduledExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ScheduledExecutorServiceAdapter) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Test(org.junit.Test)

Example 22 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class HeartbeatManagerTest method testHeartbeatMonitorUpdate.

/**
	 * Tests that the heartbeat monitors are updated when receiving a new heartbeat signal.
	 */
@Test
public void testHeartbeatMonitorUpdate() {
    long heartbeatTimeout = 1000L;
    ResourceID ownResourceID = new ResourceID("foobar");
    ResourceID targetResourceID = new ResourceID("barfoo");
    HeartbeatListener<Object, Object> heartbeatListener = mock(HeartbeatListener.class);
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    ScheduledFuture<?> scheduledFuture = mock(ScheduledFuture.class);
    doReturn(scheduledFuture).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    Object expectedObject = new Object();
    when(heartbeatListener.retrievePayload()).thenReturn(FlinkCompletableFuture.completed(expectedObject));
    HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, ownResourceID, heartbeatListener, new DirectExecutorService(), scheduledExecutor, LOG);
    HeartbeatTarget<Object> heartbeatTarget = mock(HeartbeatTarget.class);
    heartbeatManager.monitorTarget(targetResourceID, heartbeatTarget);
    heartbeatManager.receiveHeartbeat(targetResourceID, expectedObject);
    verify(scheduledFuture, times(1)).cancel(true);
    verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
}
Also used : ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TimeUnit(java.util.concurrent.TimeUnit) DirectExecutorService(org.apache.flink.runtime.util.DirectExecutorService) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) Test(org.junit.Test)

Example 23 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class ExecutionGraphTestUtils method getInstance.

public static Instance getInstance(final TaskManagerGateway gateway, final int numberOfSlots) throws Exception {
    ResourceID resourceID = ResourceID.generate();
    HardwareDescription hardwareDescription = new HardwareDescription(4, 2L * 1024 * 1024 * 1024, 1024 * 1024 * 1024, 512 * 1024 * 1024);
    InetAddress address = InetAddress.getByName("127.0.0.1");
    TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);
    return new Instance(gateway, connection, new InstanceID(), hardwareDescription, numberOfSlots);
}
Also used : HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) InetAddress(java.net.InetAddress)

Example 24 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class InstanceManagerTest method testReportHeartbeat.

@Test
public void testReportHeartbeat() {
    try {
        InstanceManager cm = new InstanceManager();
        final int dataPort = 20000;
        ResourceID resID1 = ResourceID.generate();
        ResourceID resID2 = ResourceID.generate();
        ResourceID resID3 = ResourceID.generate();
        HardwareDescription hardwareDescription = HardwareDescription.extractFromSystem(4096);
        InetAddress address = InetAddress.getByName("127.0.0.1");
        // register three instances
        TaskManagerLocation ici1 = new TaskManagerLocation(resID1, address, dataPort);
        TaskManagerLocation ici2 = new TaskManagerLocation(resID2, address, dataPort + 1);
        TaskManagerLocation ici3 = new TaskManagerLocation(resID3, address, dataPort + 2);
        JavaTestKit probe1 = new JavaTestKit(system);
        JavaTestKit probe2 = new JavaTestKit(system);
        JavaTestKit probe3 = new JavaTestKit(system);
        InstanceID instanceID1 = cm.registerTaskManager(new ActorTaskManagerGateway(new AkkaActorGateway(probe1.getRef(), leaderSessionID)), ici1, hardwareDescription, 1);
        InstanceID instanceID2 = cm.registerTaskManager(new ActorTaskManagerGateway(new AkkaActorGateway(probe2.getRef(), leaderSessionID)), ici2, hardwareDescription, 1);
        InstanceID instanceID3 = cm.registerTaskManager(new ActorTaskManagerGateway(new AkkaActorGateway(probe3.getRef(), leaderSessionID)), ici3, hardwareDescription, 1);
        // report some immediate heart beats
        assertTrue(cm.reportHeartBeat(instanceID1));
        assertTrue(cm.reportHeartBeat(instanceID2));
        assertTrue(cm.reportHeartBeat(instanceID3));
        // report heart beat for non-existing instance
        assertFalse(cm.reportHeartBeat(new InstanceID()));
        final long WAIT = 200;
        CommonTestUtils.sleepUninterruptibly(WAIT);
        Iterator<Instance> it = cm.getAllRegisteredInstances().iterator();
        Instance instance1 = it.next();
        long h1 = instance1.getLastHeartBeat();
        long h2 = it.next().getLastHeartBeat();
        long h3 = it.next().getLastHeartBeat();
        // send one heart beat again and verify that the
        assertTrue(cm.reportHeartBeat(instance1.getId()));
        long newH1 = instance1.getLastHeartBeat();
        long now = System.currentTimeMillis();
        assertTrue(now - h1 >= WAIT);
        assertTrue(now - h2 >= WAIT);
        assertTrue(now - h3 >= WAIT);
        assertTrue(now - newH1 <= WAIT);
        cm.shutdown();
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail("Test erroneous: " + e.getMessage());
    }
}
Also used : TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) InetAddress(java.net.InetAddress) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 25 with ResourceID

use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.

the class InstanceTest method testAllocatingAndCancellingSlots.

@Test
public void testAllocatingAndCancellingSlots() {
    try {
        ResourceID resourceID = ResourceID.generate();
        HardwareDescription hardwareDescription = new HardwareDescription(4, 2L * 1024 * 1024 * 1024, 1024 * 1024 * 1024, 512 * 1024 * 1024);
        InetAddress address = InetAddress.getByName("127.0.0.1");
        TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);
        Instance instance = new Instance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), connection, new InstanceID(), hardwareDescription, 4);
        assertEquals(4, instance.getTotalNumberOfSlots());
        assertEquals(4, instance.getNumberOfAvailableSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
        SimpleSlot slot1 = instance.allocateSimpleSlot(new JobID());
        SimpleSlot slot2 = instance.allocateSimpleSlot(new JobID());
        SimpleSlot slot3 = instance.allocateSimpleSlot(new JobID());
        SimpleSlot slot4 = instance.allocateSimpleSlot(new JobID());
        assertNotNull(slot1);
        assertNotNull(slot2);
        assertNotNull(slot3);
        assertNotNull(slot4);
        assertEquals(0, instance.getNumberOfAvailableSlots());
        assertEquals(4, instance.getNumberOfAllocatedSlots());
        assertEquals(6, slot1.getSlotNumber() + slot2.getSlotNumber() + slot3.getSlotNumber() + slot4.getSlotNumber());
        // no more slots
        assertNull(instance.allocateSimpleSlot(new JobID()));
        try {
            instance.returnAllocatedSlot(slot2);
            fail("instance accepted a non-cancelled slot.");
        } catch (IllegalArgumentException e) {
        // good
        }
        // release the slots. this returns them to the instance
        slot1.releaseSlot();
        slot2.releaseSlot();
        slot3.releaseSlot();
        slot4.releaseSlot();
        assertEquals(4, instance.getNumberOfAvailableSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
        assertFalse(instance.returnAllocatedSlot(slot1));
        assertFalse(instance.returnAllocatedSlot(slot2));
        assertFalse(instance.returnAllocatedSlot(slot3));
        assertFalse(instance.returnAllocatedSlot(slot4));
        assertEquals(4, instance.getNumberOfAvailableSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) InetAddress(java.net.InetAddress) JobID(org.apache.flink.api.common.JobID) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Aggregations

ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)74 Test (org.junit.Test)48 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)25 Time (org.apache.flink.api.common.time.Time)18 UUID (java.util.UUID)16 JobID (org.apache.flink.api.common.JobID)16 Configuration (org.apache.flink.configuration.Configuration)14 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)13 JavaTestKit (akka.testkit.JavaTestKit)12 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)12 InetAddress (java.net.InetAddress)11 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)10 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)10 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)10 SlotRequest (org.apache.flink.runtime.resourcemanager.SlotRequest)10 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)9 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)9 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)9 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)9 TestingSerialRpcService (org.apache.flink.runtime.rpc.TestingSerialRpcService)9