Search in sources :

Example 76 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class TaskManagerGroupTest method addAndRemoveJobs.

// ------------------------------------------------------------------------
//  adding and removing jobs
// ------------------------------------------------------------------------
@Test
public void addAndRemoveJobs() throws IOException {
    MetricRegistry registry = new MetricRegistry(MetricRegistryConfiguration.defaultMetricRegistryConfiguration());
    final TaskManagerMetricGroup group = new TaskManagerMetricGroup(registry, "localhost", new AbstractID().toString());
    final JobID jid1 = new JobID();
    final JobID jid2 = new JobID();
    final String jobName1 = "testjob";
    final String jobName2 = "anotherJob";
    final JobVertexID vertex11 = new JobVertexID();
    final JobVertexID vertex12 = new JobVertexID();
    final JobVertexID vertex13 = new JobVertexID();
    final JobVertexID vertex21 = new JobVertexID();
    final ExecutionAttemptID execution11 = new ExecutionAttemptID();
    final ExecutionAttemptID execution12 = new ExecutionAttemptID();
    final ExecutionAttemptID execution13 = new ExecutionAttemptID();
    final ExecutionAttemptID execution21 = new ExecutionAttemptID();
    TaskMetricGroup tmGroup11 = group.addTaskForJob(jid1, jobName1, vertex11, execution11, "test", 17, 0);
    TaskMetricGroup tmGroup12 = group.addTaskForJob(jid1, jobName1, vertex12, execution12, "test", 13, 1);
    TaskMetricGroup tmGroup21 = group.addTaskForJob(jid2, jobName2, vertex21, execution21, "test", 7, 2);
    assertEquals(2, group.numRegisteredJobMetricGroups());
    assertFalse(tmGroup11.parent().isClosed());
    assertFalse(tmGroup12.parent().isClosed());
    assertFalse(tmGroup21.parent().isClosed());
    // close all for job 2 and one from job 1
    tmGroup11.close();
    tmGroup21.close();
    assertTrue(tmGroup11.isClosed());
    assertTrue(tmGroup21.isClosed());
    // job 2 should be removed, job should still be there
    assertFalse(tmGroup11.parent().isClosed());
    assertFalse(tmGroup12.parent().isClosed());
    assertTrue(tmGroup21.parent().isClosed());
    assertEquals(1, group.numRegisteredJobMetricGroups());
    // add one more to job one
    TaskMetricGroup tmGroup13 = group.addTaskForJob(jid1, jobName1, vertex13, execution13, "test", 0, 0);
    tmGroup12.close();
    tmGroup13.close();
    assertTrue(tmGroup11.parent().isClosed());
    assertTrue(tmGroup12.parent().isClosed());
    assertTrue(tmGroup13.parent().isClosed());
    assertEquals(0, group.numRegisteredJobMetricGroups());
    registry.shutdown();
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AbstractID(org.apache.flink.util.AbstractID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 77 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class TaskManagerGroupTest method testCloseClosesAll.

@Test
public void testCloseClosesAll() throws IOException {
    MetricRegistry registry = new MetricRegistry(MetricRegistryConfiguration.defaultMetricRegistryConfiguration());
    final TaskManagerMetricGroup group = new TaskManagerMetricGroup(registry, "localhost", new AbstractID().toString());
    final JobID jid1 = new JobID();
    final JobID jid2 = new JobID();
    final String jobName1 = "testjob";
    final String jobName2 = "anotherJob";
    final JobVertexID vertex11 = new JobVertexID();
    final JobVertexID vertex12 = new JobVertexID();
    final JobVertexID vertex21 = new JobVertexID();
    final ExecutionAttemptID execution11 = new ExecutionAttemptID();
    final ExecutionAttemptID execution12 = new ExecutionAttemptID();
    final ExecutionAttemptID execution21 = new ExecutionAttemptID();
    TaskMetricGroup tmGroup11 = group.addTaskForJob(jid1, jobName1, vertex11, execution11, "test", 17, 0);
    TaskMetricGroup tmGroup12 = group.addTaskForJob(jid1, jobName1, vertex12, execution12, "test", 13, 1);
    TaskMetricGroup tmGroup21 = group.addTaskForJob(jid2, jobName2, vertex21, execution21, "test", 7, 1);
    group.close();
    assertTrue(tmGroup11.isClosed());
    assertTrue(tmGroup12.isClosed());
    assertTrue(tmGroup21.isClosed());
    registry.shutdown();
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AbstractID(org.apache.flink.util.AbstractID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 78 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class BlobLibraryCacheRecoveryITCase method testRecoveryRegisterAndDownload.

/**
	 * Tests that with {@link HighAvailabilityMode#ZOOKEEPER} distributed JARs are recoverable from any
	 * participating BlobLibraryCacheManager.
	 */
@Test
public void testRecoveryRegisterAndDownload() throws Exception {
    Random rand = new Random();
    BlobServer[] server = new BlobServer[2];
    InetSocketAddress[] serverAddress = new InetSocketAddress[2];
    BlobLibraryCacheManager[] libServer = new BlobLibraryCacheManager[2];
    BlobCache cache = null;
    BlobLibraryCacheManager libCache = null;
    Configuration config = new Configuration();
    config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
    config.setString(CoreOptions.STATE_BACKEND, "FILESYSTEM");
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.getRoot().getAbsolutePath());
    try {
        for (int i = 0; i < server.length; i++) {
            server[i] = new BlobServer(config);
            serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
            libServer[i] = new BlobLibraryCacheManager(server[i], 3600 * 1000);
        }
        // Random data
        byte[] expected = new byte[1024];
        rand.nextBytes(expected);
        List<BlobKey> keys = new ArrayList<>(2);
        // Upload some data (libraries)
        try (BlobClient client = new BlobClient(serverAddress[0], config)) {
            // Request 1
            keys.add(client.put(expected));
            // Request 2
            keys.add(client.put(expected, 32, 256));
        }
        // The cache
        cache = new BlobCache(serverAddress[0], config);
        libCache = new BlobLibraryCacheManager(cache, 3600 * 1000);
        // Register uploaded libraries
        JobID jobId = new JobID();
        ExecutionAttemptID executionId = new ExecutionAttemptID();
        libServer[0].registerTask(jobId, executionId, keys, Collections.<URL>emptyList());
        // Verify key 1
        File f = libCache.getFile(keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Shutdown cache and start with other server
        cache.shutdown();
        libCache.shutdown();
        cache = new BlobCache(serverAddress[1], config);
        libCache = new BlobLibraryCacheManager(cache, 3600 * 1000);
        // Verify key 1
        f = libCache.getFile(keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Verify key 2
        f = libCache.getFile(keys.get(1));
        assertEquals(256, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < 256 && fis.available() > 0; i++) {
                assertEquals(expected[32 + i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Remove blobs again
        try (BlobClient client = new BlobClient(serverAddress[1], config)) {
            client.delete(keys.get(0));
            client.delete(keys.get(1));
        }
        // Verify everything is clean below recoveryDir/<cluster_id>
        final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
        File haBlobStoreDir = new File(temporaryFolder.getRoot(), clusterId);
        File[] recoveryFiles = haBlobStoreDir.listFiles();
        assertNotNull("HA storage directory does not exist", recoveryFiles);
        assertEquals("Unclean state backend: " + Arrays.toString(recoveryFiles), 0, recoveryFiles.length);
    } finally {
        for (BlobServer s : server) {
            if (s != null) {
                s.shutdown();
            }
        }
        if (cache != null) {
            cache.shutdown();
        }
        if (libCache != null) {
            libCache.shutdown();
        }
    }
}
Also used : BlobClient(org.apache.flink.runtime.blob.BlobClient) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) BlobCache(org.apache.flink.runtime.blob.BlobCache) FileInputStream(java.io.FileInputStream) BlobKey(org.apache.flink.runtime.blob.BlobKey) Random(java.util.Random) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 79 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class SingleInputGateTest method testRequestBackoffConfiguration.

/**
	 * Tests request back off configuration is correctly forwarded to the channels.
	 */
@Test
public void testRequestBackoffConfiguration() throws Exception {
    ResultPartitionID[] partitionIds = new ResultPartitionID[] { new ResultPartitionID(), new ResultPartitionID(), new ResultPartitionID() };
    InputChannelDeploymentDescriptor[] channelDescs = new InputChannelDeploymentDescriptor[] { // Local
    new InputChannelDeploymentDescriptor(partitionIds[0], ResultPartitionLocation.createLocal()), // Remote
    new InputChannelDeploymentDescriptor(partitionIds[1], ResultPartitionLocation.createRemote(new ConnectionID(new InetSocketAddress("localhost", 5000), 0))), // Unknown
    new InputChannelDeploymentDescriptor(partitionIds[2], ResultPartitionLocation.createUnknown()) };
    InputGateDeploymentDescriptor gateDesc = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, channelDescs);
    int initialBackoff = 137;
    int maxBackoff = 1001;
    NetworkEnvironment netEnv = mock(NetworkEnvironment.class);
    when(netEnv.getResultPartitionManager()).thenReturn(new ResultPartitionManager());
    when(netEnv.getTaskEventDispatcher()).thenReturn(new TaskEventDispatcher());
    when(netEnv.getPartitionRequestInitialBackoff()).thenReturn(initialBackoff);
    when(netEnv.getPartitionRequestMaxBackoff()).thenReturn(maxBackoff);
    when(netEnv.getConnectionManager()).thenReturn(new LocalConnectionManager());
    SingleInputGate gate = SingleInputGate.create("TestTask", new JobID(), new ExecutionAttemptID(), gateDesc, netEnv, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    assertEquals(gateDesc.getConsumedPartitionType(), gate.getConsumedPartitionType());
    Map<IntermediateResultPartitionID, InputChannel> channelMap = gate.getInputChannels();
    assertEquals(3, channelMap.size());
    InputChannel localChannel = channelMap.get(partitionIds[0].getPartitionId());
    assertEquals(LocalInputChannel.class, localChannel.getClass());
    InputChannel remoteChannel = channelMap.get(partitionIds[1].getPartitionId());
    assertEquals(RemoteInputChannel.class, remoteChannel.getClass());
    InputChannel unknownChannel = channelMap.get(partitionIds[2].getPartitionId());
    assertEquals(UnknownInputChannel.class, unknownChannel.getClass());
    InputChannel[] channels = new InputChannel[] { localChannel, remoteChannel, unknownChannel };
    for (InputChannel ch : channels) {
        assertEquals(0, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff * 2, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff * 2 * 2, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(maxBackoff, ch.getCurrentBackoff());
        assertFalse(ch.increaseBackoff());
    }
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) InetSocketAddress(java.net.InetSocketAddress) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 80 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class BlockingCheckpointsTest method createTask.

// ------------------------------------------------------------------------
//  Utilities
// ------------------------------------------------------------------------
private static Task createTask(Configuration taskConfig) throws IOException {
    JobInformation jobInformation = new JobInformation(new JobID(), "test job name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
    TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "test task name", 1, 11, TestStreamTask.class.getName(), taskConfig);
    TaskKvStateRegistry mockKvRegistry = mock(TaskKvStateRegistry.class);
    NetworkEnvironment network = mock(NetworkEnvironment.class);
    when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mockKvRegistry);
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, null, mock(MemoryManager.class), mock(IOManager.class), network, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new FallbackLibraryCacheManager(), new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() }), new TestingTaskManagerRuntimeInfo(), new UnregisteredTaskMetricsGroup(), mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class), Executors.directExecutor());
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TaskKvStateRegistry(org.apache.flink.runtime.query.TaskKvStateRegistry) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FallbackLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.FallbackLibraryCacheManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID)

Aggregations

ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)81 Test (org.junit.Test)66 JobID (org.apache.flink.api.common.JobID)61 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)41 IOException (java.io.IOException)31 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)30 Configuration (org.apache.flink.configuration.Configuration)24 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)21 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)19 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)17 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)16 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)15 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)14 ActorRef (akka.actor.ActorRef)13 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)13 JavaTestKit (akka.testkit.JavaTestKit)12 BlobKey (org.apache.flink.runtime.blob.BlobKey)10 TriggerStackTraceSample (org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample)10 PartitionNotFoundException (org.apache.flink.runtime.io.network.partition.PartitionNotFoundException)9 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)9