Search in sources :

Example 16 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterQueryableStateTest method testUnregisterKvState.

@Test
public void testUnregisterKvState() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(JOB_GRAPH, rpcService).createJobMaster();
    jobMaster.start();
    final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
    registerSlotsRequiredForJobExecution(jobMasterGateway, JOB_GRAPH.getJobID());
    try {
        final String registrationName = "register-me";
        final KvStateID kvStateID = new KvStateID();
        final KeyGroupRange keyGroupRange = new KeyGroupRange(0, 0);
        final InetSocketAddress address = new InetSocketAddress(InetAddress.getLocalHost(), 1029);
        jobMasterGateway.notifyKvStateRegistered(JOB_GRAPH.getJobID(), JOB_VERTEX_1.getID(), keyGroupRange, registrationName, kvStateID, address).get();
        jobMasterGateway.notifyKvStateUnregistered(JOB_GRAPH.getJobID(), JOB_VERTEX_1.getID(), keyGroupRange, registrationName).get();
        try {
            jobMasterGateway.requestKvStateLocation(JOB_GRAPH.getJobID(), registrationName).get();
            fail("Expected to fail with an UnknownKvStateLocation.");
        } catch (Exception e) {
            assertThat(e, containsCause(UnknownKvStateLocation.class));
        }
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KvStateID(org.apache.flink.queryablestate.KvStateID) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Test(org.junit.Test)

Example 17 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterQueryableStateTest method testRegisterKvState.

@Test
public void testRegisterKvState() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(JOB_GRAPH, rpcService).createJobMaster();
    jobMaster.start();
    final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
    registerSlotsRequiredForJobExecution(jobMasterGateway, JOB_GRAPH.getJobID());
    try {
        final String registrationName = "register-me";
        final KvStateID kvStateID = new KvStateID();
        final KeyGroupRange keyGroupRange = new KeyGroupRange(0, 0);
        final InetSocketAddress address = new InetSocketAddress(InetAddress.getLocalHost(), 1029);
        jobMasterGateway.notifyKvStateRegistered(JOB_GRAPH.getJobID(), JOB_VERTEX_1.getID(), keyGroupRange, registrationName, kvStateID, address).get();
        final KvStateLocation location = jobMasterGateway.requestKvStateLocation(JOB_GRAPH.getJobID(), registrationName).get();
        assertEquals(JOB_GRAPH.getJobID(), location.getJobId());
        assertEquals(JOB_VERTEX_1.getID(), location.getJobVertexId());
        assertEquals(JOB_VERTEX_1.getMaxParallelism(), location.getNumKeyGroups());
        assertEquals(1, location.getNumRegisteredKeyGroups());
        assertEquals(1, keyGroupRange.getNumberOfKeyGroups());
        assertEquals(kvStateID, location.getKvStateID(keyGroupRange.getStartKeyGroup()));
        assertEquals(address, location.getKvStateServerAddress(keyGroupRange.getStartKeyGroup()));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KvStateID(org.apache.flink.queryablestate.KvStateID) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) Test(org.junit.Test)

Example 18 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterQueryableStateTest method testRequestKvStateOfWrongJob.

@Test
public void testRequestKvStateOfWrongJob() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(JOB_GRAPH, rpcService).createJobMaster();
    jobMaster.start();
    final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
    registerSlotsRequiredForJobExecution(jobMasterGateway, JOB_GRAPH.getJobID());
    try {
        // lookup location
        try {
            jobMasterGateway.requestKvStateLocation(new JobID(), "unknown").get();
            fail("Expected to fail with FlinkJobNotFoundException");
        } catch (Exception e) {
            assertThat(e, containsCause(FlinkJobNotFoundException.class));
        }
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) JobID(org.apache.flink.api.common.JobID) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Test(org.junit.Test)

Example 19 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterQueryableStateTest method testDuplicatedKvStateRegistrationsFailTask.

@Test
public void testDuplicatedKvStateRegistrationsFailTask() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(JOB_GRAPH, rpcService).createJobMaster();
    jobMaster.start();
    final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
    registerSlotsRequiredForJobExecution(jobMasterGateway, JOB_GRAPH.getJobID());
    try {
        // duplicate registration fails task
        final String registrationName = "duplicate-me";
        registerKvState(jobMasterGateway, JOB_GRAPH.getJobID(), JOB_VERTEX_1.getID(), registrationName);
        try {
            registerKvState(jobMasterGateway, JOB_GRAPH.getJobID(), JOB_VERTEX_2.getID(), registrationName);
            fail("Expected to fail because of clashing registration message.");
        } catch (Exception e) {
            assertTrue(ExceptionUtils.findThrowableWithMessage(e, "Registration name clash").isPresent());
            assertThat(jobMasterGateway.requestJobStatus(testingTimeout).get(), either(is(JobStatus.FAILED)).or(is(JobStatus.FAILING)));
        }
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Test(org.junit.Test)

Example 20 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method testResourceManagerBecomesUnreachableTriggersDisconnect.

@Test
public void testResourceManagerBecomesUnreachableTriggersDisconnect() throws Exception {
    final String resourceManagerAddress = "rm";
    final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
    final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
    final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
    final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
    final CountDownLatch registrationAttempts = new CountDownLatch(2);
    final Queue<CompletableFuture<RegistrationResponse>> connectionResponses = new ArrayDeque<>(2);
    connectionResponses.add(CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess()));
    connectionResponses.add(new CompletableFuture<>());
    resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
        registrationAttempts.countDown();
        return connectionResponses.poll();
    });
    resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
    resourceManagerGateway.setJobMasterHeartbeatFunction(ignored -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "resource manager is unreachable")));
    rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        // define a leader and see that a registration happens
        rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        CommonTestUtils.waitUntilCondition(() -> {
            jobMasterGateway.heartbeatFromResourceManager(rmResourceId);
            return disconnectedJobManagerFuture.isDone();
        }, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)), 50L);
        // heartbeat timeout should trigger disconnect JobManager from ResourceManager
        assertThat(disconnectedJobManagerFuture.join(), equalTo(jobGraph.getJobID()));
        // the JobMaster should try to reconnect to the RM
        registrationAttempts.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : CountDownLatch(java.util.concurrent.CountDownLatch) ArrayDeque(java.util.ArrayDeque) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobMasterBuilder (org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)28 Test (org.junit.Test)26 ExecutionException (java.util.concurrent.ExecutionException)13 JobID (org.apache.flink.api.common.JobID)13 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)13 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)13 CompletableFuture (java.util.concurrent.CompletableFuture)12 CompletedCheckpoint (org.apache.flink.runtime.checkpoint.CompletedCheckpoint)12 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)12 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)12 ResourceManagerId (org.apache.flink.runtime.resourcemanager.ResourceManagerId)11 File (java.io.File)10 CountDownLatch (java.util.concurrent.CountDownLatch)10 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)10 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)10 TaskExecutionState (org.apache.flink.runtime.taskmanager.TaskExecutionState)10 ArrayDeque (java.util.ArrayDeque)9 ArrayList (java.util.ArrayList)9