Search in sources :

Example 81 with FlinkException

use of org.apache.flink.util.FlinkException in project flink by apache.

the class ResourceManagerTest method testDisconnectJobManager.

private void testDisconnectJobManager(JobStatus jobStatus) throws Exception {
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
    rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final OneShotLatch jobAdded = new OneShotLatch();
    final OneShotLatch jobRemoved = new OneShotLatch();
    final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setAddJobConsumer(ignored -> jobAdded.trigger()).setRemoveJobConsumer(ignored -> jobRemoved.trigger()).build();
    resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).buildAndStart();
    highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()));
    final JobID jobId = JobID.generate();
    final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
    resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT);
    jobAdded.await();
    resourceManagerGateway.disconnectJobManager(jobId, jobStatus, new FlinkException("Test exception"));
    if (jobStatus.isGloballyTerminalState()) {
        jobRemoved.await();
    } else {
        // job should not get removed
        try {
            jobRemoved.await(10L, TimeUnit.MILLISECONDS);
            fail("We should not have removed the job.");
        } catch (TimeoutException expected) {
        }
    }
}
Also used : RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Matchers.nullValue(org.hamcrest.Matchers.nullValue) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) AfterClass(org.junit.AfterClass) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) Matchers.anyOf(org.hamcrest.Matchers.anyOf) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BeforeClass(org.junit.BeforeClass) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) Function(java.util.function.Function) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) DeclarativeSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.DeclarativeSlotManagerBuilder) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Before(org.junit.Before) Matchers.empty(org.hamcrest.Matchers.empty) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Test(org.junit.Test) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException)

Example 82 with FlinkException

use of org.apache.flink.util.FlinkException in project flink by apache.

the class RetryingRegistrationTest method testFailure.

@Test
public void testFailure() throws Exception {
    final String testId = "qui a coupe le fromage";
    final String testEndpointAddress = "<test-address>";
    final UUID leaderId = UUID.randomUUID();
    ManualResponseTestRegistrationGateway testGateway = new ManualResponseTestRegistrationGateway(// timeout
    null, new RegistrationResponse.Failure(new FlinkException("no reason")), // timeout
    null, // success
    new TestRegistrationSuccess(testId));
    try {
        rpcService.registerGateway(testEndpointAddress, testGateway);
        TestRetryingRegistration registration = new TestRetryingRegistration(rpcService, testEndpointAddress, leaderId);
        long started = System.nanoTime();
        registration.startRegistration();
        CompletableFuture<RetryingRegistration.RetryingRegistrationResult<TestRegistrationGateway, TestRegistrationSuccess, TestRegistrationRejection>> future = registration.getFuture();
        RetryingRegistration.RetryingRegistrationResult<TestRegistrationGateway, TestRegistrationSuccess, TestRegistrationRejection> registrationResponse = future.get(10L, TimeUnit.SECONDS);
        long finished = System.nanoTime();
        long elapsedMillis = (finished - started) / 1000000;
        // validate correct invocation and result
        assertEquals(testId, registrationResponse.getSuccess().getCorrelationId());
        assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
        // validate that some retry-delay / back-off behavior happened
        assertTrue("retries did not properly back off", elapsedMillis >= 2 * TestRetryingRegistration.INITIAL_TIMEOUT + TestRetryingRegistration.DELAY_ON_FAILURE);
    } finally {
        testGateway.stop();
    }
}
Also used : Mockito.anyString(org.mockito.Mockito.anyString) FlinkException(org.apache.flink.util.FlinkException) UUID(java.util.UUID) Test(org.junit.Test)

Example 83 with FlinkException

use of org.apache.flink.util.FlinkException in project flink by apache.

the class DeclarativeSlotManagerTest method testSlotRequestRemovedIfTMReportsAllocation.

/**
 * Tests that pending request is removed if task executor reports a slot with the same job id.
 */
@Test
public void testSlotRequestRemovedIfTMReportsAllocation() throws Exception {
    final ResourceTracker resourceTracker = new DefaultResourceTracker();
    final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
    try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setResourceTracker(resourceTracker).setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
        final JobID jobID = new JobID();
        slotManager.processResourceRequirements(createResourceRequirementsForSingleSlot(jobID));
        final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
        final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
        final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
        responseQueue.offer(firstManualSlotRequestResponse);
        final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
        responseQueue.offer(secondManualSlotRequestResponse);
        final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
            requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
            try {
                return responseQueue.take();
            } catch (InterruptedException ignored) {
                return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
            }
        }).createTestingTaskExecutorGateway();
        final ResourceID taskExecutorResourceId = ResourceID.generate();
        final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
        final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
        slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
        final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
        // fail first request
        firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));
        final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
        // fail second request
        secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", new AllocationID(), jobID));
        assertThat(firstRequest.f1, equalTo(jobID));
        assertThat(secondRequest.f1, equalTo(jobID));
        assertThat(secondRequest.f0, equalTo(firstRequest.f0));
        final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
        assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
        assertThat(slot.getJobId(), equalTo(firstRequest.f1));
        assertThat(slotManager.getNumberRegisteredSlots(), is(1));
        assertThat(getTotalResourceCount(resourceTracker.getAcquiredResources(jobID)), is(1));
    }
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) Assert.assertThat(org.junit.Assert.assertThat) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) TestLogger(org.apache.flink.util.TestLogger) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Set(java.util.Set) BlockingQueue(java.util.concurrent.BlockingQueue) SlotManagerMetricGroup(org.apache.flink.runtime.metrics.groups.SlotManagerMetricGroup) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TestingUtils(org.apache.flink.testutils.TestingUtils) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Assert.assertFalse(org.junit.Assert.assertFalse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FlinkException(org.apache.flink.util.FlinkException) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) CoreMatchers.not(org.hamcrest.CoreMatchers.not) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Assert.assertSame(org.junit.Assert.assertSame) ManuallyTriggeredScheduledExecutorService(org.apache.flink.core.testutils.ManuallyTriggeredScheduledExecutorService) TestingMetricRegistry(org.apache.flink.runtime.metrics.util.TestingMetricRegistry) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Matchers.hasSize(org.hamcrest.Matchers.hasSize) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Matchers.empty(org.hamcrest.Matchers.empty) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) SystemExitTrackingSecurityManager(org.apache.flink.runtime.testutils.SystemExitTrackingSecurityManager) Test(org.junit.Test) InstanceID(org.apache.flink.runtime.instance.InstanceID) Iterators(org.apache.flink.shaded.guava30.com.google.common.collect.Iterators) TimeUnit(java.util.concurrent.TimeUnit) JobID(org.apache.flink.api.common.JobID) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) TimeoutException(java.util.concurrent.TimeoutException) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FlinkException(org.apache.flink.util.FlinkException) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) Tuple6(org.apache.flink.api.java.tuple.Tuple6) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 84 with FlinkException

use of org.apache.flink.util.FlinkException in project flink by apache.

the class DefaultSchedulerTest method failJobWillIncrementVertexVersions.

@Test
public void failJobWillIncrementVertexVersions() {
    final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
    final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
    final ExecutionVertexID onlyExecutionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    final ExecutionVertexVersion executionVertexVersion = executionVertexVersioner.getExecutionVertexVersion(onlyExecutionVertexId);
    scheduler.failJob(new FlinkException("Test failure."), System.currentTimeMillis());
    assertTrue(executionVertexVersioner.isModified(executionVertexVersion));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) FlinkException(org.apache.flink.util.FlinkException) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 85 with FlinkException

use of org.apache.flink.util.FlinkException in project flink by apache.

the class OperatorEventDispatcherImpl method dispatchEventToHandlers.

void dispatchEventToHandlers(OperatorID operatorID, SerializedValue<OperatorEvent> serializedEvent) throws FlinkException {
    final OperatorEvent evt;
    try {
        evt = serializedEvent.deserializeValue(classLoader);
    } catch (IOException | ClassNotFoundException e) {
        throw new FlinkException("Could not deserialize operator event", e);
    }
    final OperatorEventHandler handler = handlers.get(operatorID);
    if (handler != null) {
        handler.handleOperatorEvent(evt);
    } else {
        throw new FlinkException("Operator not registered for operator events");
    }
}
Also used : OperatorEventHandler(org.apache.flink.runtime.operators.coordination.OperatorEventHandler) OperatorEvent(org.apache.flink.runtime.operators.coordination.OperatorEvent) IOException(java.io.IOException) FlinkException(org.apache.flink.util.FlinkException)

Aggregations

FlinkException (org.apache.flink.util.FlinkException)197 Test (org.junit.Test)91 CompletableFuture (java.util.concurrent.CompletableFuture)59 IOException (java.io.IOException)38 ExecutionException (java.util.concurrent.ExecutionException)26 ArrayList (java.util.ArrayList)25 JobID (org.apache.flink.api.common.JobID)24 Collection (java.util.Collection)22 CompletionException (java.util.concurrent.CompletionException)22 Configuration (org.apache.flink.configuration.Configuration)21 TimeoutException (java.util.concurrent.TimeoutException)19 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)19 Time (org.apache.flink.api.common.time.Time)16 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)16 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)16 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)15 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)14 Collections (java.util.Collections)13 List (java.util.List)13 ExecutorService (java.util.concurrent.ExecutorService)13