Search in sources :

Example 6 with BlobKey

use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.

the class TaskManagerTest method testJobSubmissionAndCanceling.

@Test
public void testJobSubmissionAndCanceling() {
    new JavaTestKit(system) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager = null;
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
                jobManager = new AkkaActorGateway(jm, leaderSessionID);
                taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
                final JobID jid1 = new JobID();
                final JobID jid2 = new JobID();
                JobVertexID vid1 = new JobVertexID();
                JobVertexID vid2 = new JobVertexID();
                final ExecutionAttemptID eid1 = new ExecutionAttemptID();
                final ExecutionAttemptID eid2 = new ExecutionAttemptID();
                final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid1, "TestJob1", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "TestTask1", 5, 1, 5, 0, new Configuration(), new Configuration(), TestInvokableBlockingCancelable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid2, "TestJob2", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "TestTask2", 7, 2, 7, 0, new Configuration(), new Configuration(), TestInvokableBlockingCancelable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                final ActorGateway tm = taskManager;
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
                            Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
                            tm.tell(new SubmitTask(tdd1), testActorGateway);
                            tm.tell(new SubmitTask(tdd2), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            expectMsgEquals(Acknowledge.get());
                            Await.ready(t1Running, d);
                            Await.ready(t2Running, d);
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            Map<ExecutionAttemptID, Task> runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(2, runningTasks.size());
                            Task t1 = runningTasks.get(eid1);
                            Task t2 = runningTasks.get(eid2);
                            assertNotNull(t1);
                            assertNotNull(t2);
                            assertEquals(ExecutionState.RUNNING, t1.getExecutionState());
                            assertEquals(ExecutionState.RUNNING, t2.getExecutionState());
                            tm.tell(new CancelTask(eid1), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
                            Await.ready(response, d);
                            assertEquals(ExecutionState.CANCELED, t1.getExecutionState());
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(1, runningTasks.size());
                            tm.tell(new CancelTask(eid1), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            tm.tell(new CancelTask(eid2), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
                            Await.ready(response, d);
                            assertEquals(ExecutionState.CANCELED, t2.getExecutionState());
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            runningTasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(0, runningTasks.size());
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) StopTask(org.apache.flink.runtime.messages.TaskMessages.StopTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BlobKey(org.apache.flink.runtime.blob.BlobKey) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 7 with BlobKey

use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.

the class TaskManagerTest method testFailingScheduleOrUpdateConsumersMessage.

/**
	 * Test that a failing schedule or update consumers call leads to the failing of the respective
	 * task.
	 *
	 * IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
	 * then the future is completed. We do this by not eagerly deploy consumer tasks and requiring
	 * the invokable to fill one memory segment. The completed memory segment will trigger the
	 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
	 * memory segment, we'll block the invokable and wait for the task failure due to the failed
	 * schedule or update consumers call.
	 */
@Test(timeout = 10000L)
public void testFailingScheduleOrUpdateConsumersMessage() throws Exception {
    new JavaTestKit(system) {

        {
            final Configuration configuration = new Configuration();
            // set the memory segment to the smallest size possible, because we have to fill one
            // memory buffer to trigger the schedule or update consumers message to the downstream
            // operators
            configuration.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
            final JobID jid = new JobID();
            final JobVertexID vid = new JobVertexID();
            final ExecutionAttemptID eid = new ExecutionAttemptID();
            final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
            final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), new IntermediateResultPartitionID(), ResultPartitionType.PIPELINED, 1, 1, true);
            final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig, "TestTask", 1, 0, 1, 0, new Configuration(), new Configuration(), TestInvokableRecordCancel.class.getName(), Collections.singletonList(resultPartitionDeploymentDescriptor), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
            ActorRef jmActorRef = system.actorOf(Props.create(FailingScheduleOrUpdateConsumersJobManager.class, leaderSessionID), "jobmanager");
            ActorGateway jobManager = new AkkaActorGateway(jmActorRef, leaderSessionID);
            final ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, configuration, true, true);
            try {
                TestInvokableRecordCancel.resetGotCanceledFuture();
                Future<Object> result = taskManager.ask(new SubmitTask(tdd), timeout);
                Await.result(result, timeout);
                org.apache.flink.runtime.concurrent.Future<Boolean> cancelFuture = TestInvokableRecordCancel.gotCanceled();
                assertEquals(true, cancelFuture.get());
            } finally {
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BlobKey(org.apache.flink.runtime.blob.BlobKey) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) SerializedValue(org.apache.flink.util.SerializedValue) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 8 with BlobKey

use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.

the class TaskDeploymentDescriptorTest method testSerialization.

@Test
public void testSerialization() {
    try {
        final JobID jobID = new JobID();
        final JobVertexID vertexID = new JobVertexID();
        final ExecutionAttemptID execId = new ExecutionAttemptID();
        final AllocationID allocationId = new AllocationID();
        final String jobName = "job name";
        final String taskName = "task name";
        final int numberOfKeyGroups = 1;
        final int indexInSubtaskGroup = 0;
        final int currentNumberOfSubtasks = 1;
        final int attemptNumber = 0;
        final Configuration jobConfiguration = new Configuration();
        final Configuration taskConfiguration = new Configuration();
        final Class<? extends AbstractInvokable> invokableClass = BatchTask.class;
        final List<ResultPartitionDeploymentDescriptor> producedResults = new ArrayList<ResultPartitionDeploymentDescriptor>(0);
        final List<InputGateDeploymentDescriptor> inputGates = new ArrayList<InputGateDeploymentDescriptor>(0);
        final List<BlobKey> requiredJars = new ArrayList<BlobKey>(0);
        final List<URL> requiredClasspaths = new ArrayList<URL>(0);
        final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
        final SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(new JobInformation(jobID, jobName, executionConfig, jobConfiguration, requiredJars, requiredClasspaths));
        final SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(new TaskInformation(vertexID, taskName, currentNumberOfSubtasks, numberOfKeyGroups, invokableClass.getName(), taskConfiguration));
        final int targetSlotNumber = 47;
        final TaskStateHandles taskStateHandles = new TaskStateHandles();
        final TaskDeploymentDescriptor orig = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, execId, allocationId, indexInSubtaskGroup, attemptNumber, targetSlotNumber, taskStateHandles, producedResults, inputGates);
        final TaskDeploymentDescriptor copy = CommonTestUtils.createCopySerializable(orig);
        assertFalse(orig.getSerializedJobInformation() == copy.getSerializedJobInformation());
        assertFalse(orig.getSerializedTaskInformation() == copy.getSerializedTaskInformation());
        assertFalse(orig.getExecutionAttemptId() == copy.getExecutionAttemptId());
        assertFalse(orig.getTaskStateHandles() == copy.getTaskStateHandles());
        assertFalse(orig.getProducedPartitions() == copy.getProducedPartitions());
        assertFalse(orig.getInputGates() == copy.getInputGates());
        assertEquals(orig.getSerializedJobInformation(), copy.getSerializedJobInformation());
        assertEquals(orig.getSerializedTaskInformation(), copy.getSerializedTaskInformation());
        assertEquals(orig.getExecutionAttemptId(), copy.getExecutionAttemptId());
        assertEquals(orig.getAllocationId(), copy.getAllocationId());
        assertEquals(orig.getSubtaskIndex(), copy.getSubtaskIndex());
        assertEquals(orig.getAttemptNumber(), copy.getAttemptNumber());
        assertEquals(orig.getTargetSlotNumber(), copy.getTargetSlotNumber());
        assertEquals(orig.getTaskStateHandles(), copy.getTaskStateHandles());
        assertEquals(orig.getProducedPartitions(), copy.getProducedPartitions());
        assertEquals(orig.getInputGates(), copy.getInputGates());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) URL(java.net.URL) BlobKey(org.apache.flink.runtime.blob.BlobKey) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) BatchTask(org.apache.flink.runtime.operators.BatchTask) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SerializedValue(org.apache.flink.util.SerializedValue) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 9 with BlobKey

use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.

the class BlobLibraryCacheManagerTest method testLibraryCacheManagerCleanup.

@Test
public void testLibraryCacheManagerCleanup() {
    JobID jid = new JobID();
    List<BlobKey> keys = new ArrayList<BlobKey>();
    BlobServer server = null;
    BlobLibraryCacheManager libraryCacheManager = null;
    final byte[] buf = new byte[128];
    try {
        Configuration config = new Configuration();
        server = new BlobServer(config);
        InetSocketAddress blobSocketAddress = new InetSocketAddress(server.getPort());
        BlobClient bc = new BlobClient(blobSocketAddress, config);
        keys.add(bc.put(buf));
        buf[0] += 1;
        keys.add(bc.put(buf));
        long cleanupInterval = 1000l;
        libraryCacheManager = new BlobLibraryCacheManager(server, cleanupInterval);
        libraryCacheManager.registerJob(jid, keys, Collections.<URL>emptyList());
        List<File> files = new ArrayList<File>();
        for (BlobKey key : keys) {
            files.add(libraryCacheManager.getFile(key));
        }
        assertEquals(2, files.size());
        files.clear();
        libraryCacheManager.unregisterJob(jid);
        // because we cannot guarantee that there are not thread races in the build system, we
        // loop for a certain while until the references disappear
        {
            long deadline = System.currentTimeMillis() + 30000;
            do {
                Thread.sleep(500);
            } while (libraryCacheManager.getNumberOfCachedLibraries() > 0 && System.currentTimeMillis() < deadline);
        }
        // this fails if we exited via a timeout
        assertEquals(0, libraryCacheManager.getNumberOfCachedLibraries());
        int caughtExceptions = 0;
        for (BlobKey key : keys) {
            // the blob cache should no longer contain the files
            try {
                files.add(libraryCacheManager.getFile(key));
            } catch (IOException ioe) {
                caughtExceptions++;
            }
        }
        assertEquals(2, caughtExceptions);
        bc.close();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (server != null) {
            server.shutdown();
        }
        if (libraryCacheManager != null) {
            try {
                libraryCacheManager.shutdown();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
Also used : BlobClient(org.apache.flink.runtime.blob.BlobClient) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IOException(java.io.IOException) BlobKey(org.apache.flink.runtime.blob.BlobKey) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 10 with BlobKey

use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.

the class BlobLibraryCacheManagerTest method testRegisterAndDownload.

@Test
public void testRegisterAndDownload() {
    //setWritable doesn't work on Windows.
    assumeTrue(!OperatingSystem.isWindows());
    BlobServer server = null;
    BlobCache cache = null;
    File cacheDir = null;
    try {
        // create the blob transfer services
        Configuration config = new Configuration();
        server = new BlobServer(config);
        InetSocketAddress serverAddress = new InetSocketAddress("localhost", server.getPort());
        cache = new BlobCache(serverAddress, config);
        // upload some meaningless data to the server
        BlobClient uploader = new BlobClient(serverAddress, config);
        BlobKey dataKey1 = uploader.put(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 });
        BlobKey dataKey2 = uploader.put(new byte[] { 11, 12, 13, 14, 15, 16, 17, 18 });
        uploader.close();
        BlobLibraryCacheManager libCache = new BlobLibraryCacheManager(cache, 1000000000L);
        assertEquals(0, libCache.getNumberOfCachedLibraries());
        // first try to access a non-existing entry
        try {
            libCache.getClassLoader(new JobID());
            fail("Should fail with an IllegalStateException");
        } catch (IllegalStateException e) {
        // that#s what we want
        }
        // now register some BLOBs as libraries
        {
            JobID jid = new JobID();
            ExecutionAttemptID executionId = new ExecutionAttemptID();
            Collection<BlobKey> keys = Collections.singleton(dataKey1);
            libCache.registerTask(jid, executionId, keys, Collections.<URL>emptyList());
            assertEquals(1, libCache.getNumberOfReferenceHolders(jid));
            assertEquals(1, libCache.getNumberOfCachedLibraries());
            assertNotNull(libCache.getClassLoader(jid));
            // un-register them again
            libCache.unregisterTask(jid, executionId);
            // Don't fail if called again
            libCache.unregisterTask(jid, executionId);
            assertEquals(0, libCache.getNumberOfReferenceHolders(jid));
            // library is still cached (but not associated with job any more)
            assertEquals(1, libCache.getNumberOfCachedLibraries());
            // should not be able to access the classloader any more
            try {
                libCache.getClassLoader(jid);
                fail("Should fail with an IllegalStateException");
            } catch (IllegalStateException e) {
            // that#s what we want
            }
        }
        cacheDir = new File(cache.getStorageDir(), "cache");
        assertTrue(cacheDir.exists());
        // make sure no further blobs can be downloaded by removing the write
        // permissions from the directory
        assertTrue("Could not remove write permissions from cache directory", cacheDir.setWritable(false, false));
        // since we cannot download this library any more, this call should fail
        try {
            libCache.registerTask(new JobID(), new ExecutionAttemptID(), Collections.singleton(dataKey2), Collections.<URL>emptyList());
            fail("This should fail with an IOException");
        } catch (IOException e) {
        // splendid!
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cacheDir != null) {
            if (!cacheDir.setWritable(true, false)) {
                System.err.println("Could not re-add write permissions to cache directory.");
            }
        }
        if (cache != null) {
            cache.shutdown();
        }
        if (server != null) {
            server.shutdown();
        }
    }
}
Also used : BlobClient(org.apache.flink.runtime.blob.BlobClient) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) InetSocketAddress(java.net.InetSocketAddress) BlobCache(org.apache.flink.runtime.blob.BlobCache) IOException(java.io.IOException) URL(java.net.URL) IOException(java.io.IOException) BlobKey(org.apache.flink.runtime.blob.BlobKey) Collection(java.util.Collection) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

BlobKey (org.apache.flink.runtime.blob.BlobKey)22 Test (org.junit.Test)15 IOException (java.io.IOException)13 JobID (org.apache.flink.api.common.JobID)13 Configuration (org.apache.flink.configuration.Configuration)13 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)10 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)9 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)8 JavaTestKit (akka.testkit.JavaTestKit)7 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)7 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)7 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)7 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)7 ActorRef (akka.actor.ActorRef)6 File (java.io.File)6 InetSocketAddress (java.net.InetSocketAddress)6 ArrayList (java.util.ArrayList)6 TestingTaskManagerMessages (org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages)6 BlobClient (org.apache.flink.runtime.blob.BlobClient)5