use of org.apache.flink.util.FlinkException in project flink by apache.
the class ResourceManagerTest method testDisconnectJobManager.
private void testDisconnectJobManager(JobStatus jobStatus) throws Exception {
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final OneShotLatch jobAdded = new OneShotLatch();
final OneShotLatch jobRemoved = new OneShotLatch();
final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setAddJobConsumer(ignored -> jobAdded.trigger()).setRemoveJobConsumer(ignored -> jobRemoved.trigger()).build();
resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).buildAndStart();
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()));
final JobID jobId = JobID.generate();
final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT);
jobAdded.await();
resourceManagerGateway.disconnectJobManager(jobId, jobStatus, new FlinkException("Test exception"));
if (jobStatus.isGloballyTerminalState()) {
jobRemoved.await();
} else {
// job should not get removed
try {
jobRemoved.await(10L, TimeUnit.MILLISECONDS);
fail("We should not have removed the job.");
} catch (TimeoutException expected) {
}
}
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class RetryingRegistrationTest method testFailure.
@Test
public void testFailure() throws Exception {
final String testId = "qui a coupe le fromage";
final String testEndpointAddress = "<test-address>";
final UUID leaderId = UUID.randomUUID();
ManualResponseTestRegistrationGateway testGateway = new ManualResponseTestRegistrationGateway(// timeout
null, new RegistrationResponse.Failure(new FlinkException("no reason")), // timeout
null, // success
new TestRegistrationSuccess(testId));
try {
rpcService.registerGateway(testEndpointAddress, testGateway);
TestRetryingRegistration registration = new TestRetryingRegistration(rpcService, testEndpointAddress, leaderId);
long started = System.nanoTime();
registration.startRegistration();
CompletableFuture<RetryingRegistration.RetryingRegistrationResult<TestRegistrationGateway, TestRegistrationSuccess, TestRegistrationRejection>> future = registration.getFuture();
RetryingRegistration.RetryingRegistrationResult<TestRegistrationGateway, TestRegistrationSuccess, TestRegistrationRejection> registrationResponse = future.get(10L, TimeUnit.SECONDS);
long finished = System.nanoTime();
long elapsedMillis = (finished - started) / 1000000;
// validate correct invocation and result
assertEquals(testId, registrationResponse.getSuccess().getCorrelationId());
assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
// validate that some retry-delay / back-off behavior happened
assertTrue("retries did not properly back off", elapsedMillis >= 2 * TestRetryingRegistration.INITIAL_TIMEOUT + TestRetryingRegistration.DELAY_ON_FAILURE);
} finally {
testGateway.stop();
}
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotRequestRemovedIfTMReportsAllocation.
/**
* Tests that pending request is removed if task executor reports a slot with the same job id.
*/
@Test
public void testSlotRequestRemovedIfTMReportsAllocation() throws Exception {
final ResourceTracker resourceTracker = new DefaultResourceTracker();
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setResourceTracker(resourceTracker).setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
final JobID jobID = new JobID();
slotManager.processResourceRequirements(createResourceRequirementsForSingleSlot(jobID));
final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(firstManualSlotRequestResponse);
final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(secondManualSlotRequestResponse);
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
try {
return responseQueue.take();
} catch (InterruptedException ignored) {
return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
}
}).createTestingTaskExecutorGateway();
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
// fail first request
firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
// fail second request
secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", new AllocationID(), jobID));
assertThat(firstRequest.f1, equalTo(jobID));
assertThat(secondRequest.f1, equalTo(jobID));
assertThat(secondRequest.f0, equalTo(firstRequest.f0));
final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
assertThat(slot.getJobId(), equalTo(firstRequest.f1));
assertThat(slotManager.getNumberRegisteredSlots(), is(1));
assertThat(getTotalResourceCount(resourceTracker.getAcquiredResources(jobID)), is(1));
}
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class DefaultSchedulerTest method failJobWillIncrementVertexVersions.
@Test
public void failJobWillIncrementVertexVersions() {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
final ExecutionVertexID onlyExecutionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ExecutionVertexVersion executionVertexVersion = executionVertexVersioner.getExecutionVertexVersion(onlyExecutionVertexId);
scheduler.failJob(new FlinkException("Test failure."), System.currentTimeMillis());
assertTrue(executionVertexVersioner.isModified(executionVertexVersion));
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class OperatorEventDispatcherImpl method dispatchEventToHandlers.
void dispatchEventToHandlers(OperatorID operatorID, SerializedValue<OperatorEvent> serializedEvent) throws FlinkException {
final OperatorEvent evt;
try {
evt = serializedEvent.deserializeValue(classLoader);
} catch (IOException | ClassNotFoundException e) {
throw new FlinkException("Could not deserialize operator event", e);
}
final OperatorEventHandler handler = handlers.get(operatorID);
if (handler != null) {
handler.handleOperatorEvent(evt);
} else {
throw new FlinkException("Operator not registered for operator events");
}
}
Aggregations