Search in sources :

Example 91 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskManagerTest method testSubmitTaskFailure.

/**
	 * Tests that the TaskManager sends a proper exception back to the sender if the submit task
	 * message fails.
	 */
@Test
public void testSubmitTaskFailure() throws Exception {
    ActorGateway jobManager = null;
    ActorGateway taskManager = null;
    try {
        ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
        jobManager = new AkkaActorGateway(jm, leaderSessionID);
        taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
        TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(new JobID(), "test job", new JobVertexID(), new ExecutionAttemptID(), new SerializedValue<>(new ExecutionConfig()), "test task", // this will make the submission fail because the number of key groups must be >= 1
        0, 0, 1, 0, new Configuration(), new Configuration(), "Foobar", Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
        Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);
        try {
            Await.result(submitResponse, timeout);
            fail("The submit task message should have failed.");
        } catch (IllegalArgumentException e) {
        // expected
        }
    } finally {
        TestingUtils.stopActor(jobManager);
        TestingUtils.stopActor(taskManager);
    }
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 92 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskManagerTest method testRemotePartitionNotFound.

/**
	 * Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
	 */
@Test
public void testRemotePartitionNotFound() throws Exception {
    new JavaTestKit(system) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager = null;
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                final IntermediateDataSetID resultId = new IntermediateDataSetID();
                // Create the JM
                ActorRef jm = system.actorOf(Props.create(new SimplePartitionStateLookupJobManagerCreator(leaderSessionID, getTestActor())));
                jobManager = new AkkaActorGateway(jm, leaderSessionID);
                final int dataPort = NetUtils.getAvailablePort();
                Configuration config = new Configuration();
                config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, dataPort);
                config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
                config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
                taskManager = TestingUtils.createTaskManager(system, jobManager, config, false, true);
                // ---------------------------------------------------------------------------------
                final ActorGateway tm = taskManager;
                final JobID jid = new JobID();
                final JobVertexID vid = new JobVertexID();
                final ExecutionAttemptID eid = new ExecutionAttemptID();
                final ResultPartitionID partitionId = new ResultPartitionID();
                // Remote location (on the same TM though) for the partition
                final ResultPartitionLocation loc = ResultPartitionLocation.createRemote(new ConnectionID(new InetSocketAddress("localhost", dataPort), 0));
                final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(partitionId, loc) };
                final InputGateDeploymentDescriptor igdd = new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);
                final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, new SerializedValue<>(new ExecutionConfig()), "Receiver", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.AgnosticReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(igdd), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
                new Within(d) {

                    @Override
                    protected void run() {
                        // Submit the task
                        tm.tell(new SubmitTask(tdd), testActorGateway);
                        expectMsgClass(Acknowledge.get().getClass());
                        // Wait to be notified about the final execution state by the mock JM
                        TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);
                        // The task should fail after repeated requests
                        assertEquals(ExecutionState.FAILED, msg.getExecutionState());
                        Throwable t = msg.getError(ClassLoader.getSystemClassLoader());
                        assertEquals("Thrown exception was not a PartitionNotFoundException: " + t.getMessage(), PartitionNotFoundException.class, t.getClass());
                    }
                };
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) InetSocketAddress(java.net.InetSocketAddress) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ResultPartitionLocation(org.apache.flink.runtime.deployment.ResultPartitionLocation) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 93 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskManagerTest method testJobSubmissionAndCanceling.

@Test
public void testJobSubmissionAndCanceling() {
    new JavaTestKit(system) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager = null;
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
                jobManager = new AkkaActorGateway(jm, leaderSessionID);
                taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
                final JobID jid1 = new JobID();
                final JobID jid2 = new JobID();
                JobVertexID vid1 = new JobVertexID();
                JobVertexID vid2 = new JobVertexID();
                final ExecutionAttemptID eid1 = new ExecutionAttemptID();
                final ExecutionAttemptID eid2 = new ExecutionAttemptID();
                final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid1, "TestJob1", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "TestTask1", 5, 1, 5, 0, new Configuration(), new Configuration(), TestInvokableBlockingCancelable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid2, "TestJob2", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "TestTask2", 7, 2, 7, 0, new Configuration(), new Configuration(), TestInvokableBlockingCancelable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                final ActorGateway tm = taskManager;
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
                            Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
                            tm.tell(new SubmitTask(tdd1), testActorGateway);
                            tm.tell(new SubmitTask(tdd2), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            expectMsgEquals(Acknowledge.get());
                            Await.ready(t1Running, d);
                            Await.ready(t2Running, d);
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            Map<ExecutionAttemptID, Task> runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(2, runningTasks.size());
                            Task t1 = runningTasks.get(eid1);
                            Task t2 = runningTasks.get(eid2);
                            assertNotNull(t1);
                            assertNotNull(t2);
                            assertEquals(ExecutionState.RUNNING, t1.getExecutionState());
                            assertEquals(ExecutionState.RUNNING, t2.getExecutionState());
                            tm.tell(new CancelTask(eid1), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
                            Await.ready(response, d);
                            assertEquals(ExecutionState.CANCELED, t1.getExecutionState());
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(1, runningTasks.size());
                            tm.tell(new CancelTask(eid1), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            tm.tell(new CancelTask(eid2), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
                            Await.ready(response, d);
                            assertEquals(ExecutionState.CANCELED, t2.getExecutionState());
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(0, runningTasks.size());
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) StopTask(org.apache.flink.runtime.messages.TaskMessages.StopTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BlobKey(org.apache.flink.runtime.blob.BlobKey) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 94 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskManagerTest method testFailingScheduleOrUpdateConsumersMessage.

/**
	 * Test that a failing schedule or update consumers call leads to the failing of the respective
	 * task.
	 *
	 * IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
	 * then the future is completed. We do this by not eagerly deploy consumer tasks and requiring
	 * the invokable to fill one memory segment. The completed memory segment will trigger the
	 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
	 * memory segment, we'll block the invokable and wait for the task failure due to the failed
	 * schedule or update consumers call.
	 */
@Test(timeout = 10000L)
public void testFailingScheduleOrUpdateConsumersMessage() throws Exception {
    new JavaTestKit(system) {

        {
            final Configuration configuration = new Configuration();
            // set the memory segment to the smallest size possible, because we have to fill one
            // memory buffer to trigger the schedule or update consumers message to the downstream
            // operators
            configuration.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
            final JobID jid = new JobID();
            final JobVertexID vid = new JobVertexID();
            final ExecutionAttemptID eid = new ExecutionAttemptID();
            final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
            final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), new IntermediateResultPartitionID(), ResultPartitionType.PIPELINED, 1, 1, true);
            final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig, "TestTask", 1, 0, 1, 0, new Configuration(), new Configuration(), TestInvokableRecordCancel.class.getName(), Collections.singletonList(resultPartitionDeploymentDescriptor), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
            ActorRef jmActorRef = system.actorOf(Props.create(FailingScheduleOrUpdateConsumersJobManager.class, leaderSessionID), "jobmanager");
            ActorGateway jobManager = new AkkaActorGateway(jmActorRef, leaderSessionID);
            final ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, configuration, true, true);
            try {
                TestInvokableRecordCancel.resetGotCanceledFuture();
                Future<Object> result = taskManager.ask(new SubmitTask(tdd), timeout);
                Await.result(result, timeout);
                org.apache.flink.runtime.concurrent.Future<Boolean> cancelFuture = TestInvokableRecordCancel.gotCanceled();
                assertEquals(true, cancelFuture.get());
            } finally {
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BlobKey(org.apache.flink.runtime.blob.BlobKey) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) SerializedValue(org.apache.flink.util.SerializedValue) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 95 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskExecutionStateTest method testEqualsHashCode.

@Test
public void testEqualsHashCode() {
    try {
        final JobID jid = new JobID();
        final ExecutionAttemptID executionId = new ExecutionAttemptID();
        final ExecutionState state = ExecutionState.RUNNING;
        final Throwable error = new RuntimeException("some test error message");
        TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error);
        TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error);
        assertEquals(s1.hashCode(), s2.hashCode());
        assertEquals(s1, s2);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)335 Test (org.junit.Test)274 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)88 IOException (java.io.IOException)74 Configuration (org.apache.flink.configuration.Configuration)72 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)61 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)48 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)47 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)44 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)38 ArrayList (java.util.ArrayList)37 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)32 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)31 HashMap (java.util.HashMap)29 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)29 FiniteDuration (scala.concurrent.duration.FiniteDuration)28 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)24 File (java.io.File)23 UUID (java.util.UUID)23