Search in sources :

Example 11 with PermanentBlobCache

use of org.apache.flink.runtime.blob.PermanentBlobCache in project flink by splunk.

the class DefaultExecutionGraphDeploymentWithBlobCacheTest method setupBlobServer.

@Before
@Override
public void setupBlobServer() throws IOException {
    Configuration config = new Configuration();
    // always offload the serialized job and task information
    config.setInteger(BlobServerOptions.OFFLOAD_MINSIZE, 0);
    blobServer = new BlobServer(config, TEMPORARY_FOLDER.newFolder(), new VoidBlobStore());
    blobServer.start();
    blobWriter = blobServer;
    InetSocketAddress serverAddress = new InetSocketAddress("localhost", blobServer.getPort());
    blobCache = new PermanentBlobCache(config, TEMPORARY_FOLDER.newFolder(), new VoidBlobStore(), serverAddress);
}
Also used : VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) BlobServer(org.apache.flink.runtime.blob.BlobServer) Before(org.junit.Before)

Example 12 with PermanentBlobCache

use of org.apache.flink.runtime.blob.PermanentBlobCache in project flink by splunk.

the class DefaultExecutionGraphDeploymentWithSmallBlobCacheSizeLimitTest method testDeployMultipleTasksWithSmallBlobCacheSizeLimit.

/**
 * Test the deployment works well even the size limit of {@link BlobCacheSizeTracker} in {@link
 * PermanentBlobCache} is set to the minimum value.
 *
 * <p>In this extreme case, since the size limit is 1, every time a task is deployed, all the
 * existing **tracked** BLOBs on the cache must be untracked and deleted before the new BLOB is
 * stored onto the cache.
 *
 * <p>This extreme case covers the situation of the normal case, where the size limit is much
 * larger than 1 and the deletion won't happen so frequently.
 */
@Test
public void testDeployMultipleTasksWithSmallBlobCacheSizeLimit() throws Exception {
    final int numberOfVertices = 4;
    final int parallelism = 10;
    final ExecutionGraph eg = createAndSetupExecutionGraph(numberOfVertices, parallelism);
    final SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
    final BlockingQueue<TaskDeploymentDescriptor> tdds = new ArrayBlockingQueue<>(numberOfVertices * parallelism);
    taskManagerGateway.setSubmitConsumer(FunctionUtils.uncheckedConsumer(taskDeploymentDescriptor -> {
        taskDeploymentDescriptor.loadBigData(blobCache);
        tdds.offer(taskDeploymentDescriptor);
    }));
    for (ExecutionJobVertex ejv : eg.getVerticesTopologically()) {
        for (ExecutionVertex ev : ejv.getTaskVertices()) {
            assertEquals(ExecutionState.CREATED, ev.getExecutionState());
            LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(taskManagerGateway).createTestingLogicalSlot();
            final Execution execution = ev.getCurrentExecutionAttempt();
            execution.transitionState(ExecutionState.SCHEDULED);
            execution.registerProducedPartitions(slot.getTaskManagerLocation(), true).get();
            ev.deployToSlot(slot);
            assertEquals(ExecutionState.DEPLOYING, ev.getExecutionState());
            TaskDeploymentDescriptor tdd = tdds.take();
            assertNotNull(tdd);
            List<InputGateDeploymentDescriptor> igdds = tdd.getInputGates();
            assertEquals(ev.getAllConsumedPartitionGroups().size(), igdds.size());
            if (igdds.size() > 0) {
                checkShuffleDescriptors(igdds.get(0), ev.getConsumedPartitionGroup(0));
            }
        }
    }
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BlobCacheSizeTracker(org.apache.flink.runtime.blob.BlobCacheSizeTracker) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) JobException(org.apache.flink.runtime.JobException) FunctionUtils(org.apache.flink.util.function.FunctionUtils) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Before(org.junit.Before) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) Test(org.junit.Test) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) InetSocketAddress(java.net.InetSocketAddress) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) BatchTask(org.apache.flink.runtime.operators.BatchTask) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Assert.assertEquals(org.junit.Assert.assertEquals) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) Test(org.junit.Test)

Example 13 with PermanentBlobCache

use of org.apache.flink.runtime.blob.PermanentBlobCache in project flink by splunk.

the class DefaultExecutionGraphDeploymentWithSmallBlobCacheSizeLimitTest method setupBlobServer.

@Before
@Override
public void setupBlobServer() throws IOException {
    Configuration config = new Configuration();
    // Always offload the serialized JobInformation, TaskInformation and cached
    // ShuffleDescriptors
    config.setInteger(BlobServerOptions.OFFLOAD_MINSIZE, 0);
    blobServer = new BlobServer(config, TEMPORARY_FOLDER.newFolder(), new VoidBlobStore());
    blobServer.start();
    blobWriter = blobServer;
    InetSocketAddress serverAddress = new InetSocketAddress("localhost", blobServer.getPort());
    // Set the size limit of the blob cache to 1
    BlobCacheSizeTracker blobCacheSizeTracker = new BlobCacheSizeTracker(1L);
    blobCache = new PermanentBlobCache(config, TEMPORARY_FOLDER.newFolder(), new VoidBlobStore(), serverAddress, blobCacheSizeTracker);
}
Also used : VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) BlobCacheSizeTracker(org.apache.flink.runtime.blob.BlobCacheSizeTracker) BlobServer(org.apache.flink.runtime.blob.BlobServer) Before(org.junit.Before)

Example 14 with PermanentBlobCache

use of org.apache.flink.runtime.blob.PermanentBlobCache in project flink by splunk.

the class BlobLibraryCacheRecoveryITCase method testRecoveryRegisterAndDownload.

/**
 * Tests that with {@link HighAvailabilityMode#ZOOKEEPER} distributed JARs are recoverable from
 * any participating BlobLibraryCacheManager.
 */
@Test
public void testRecoveryRegisterAndDownload() throws Exception {
    Random rand = new Random();
    BlobServer[] server = new BlobServer[2];
    InetSocketAddress[] serverAddress = new InetSocketAddress[2];
    BlobLibraryCacheManager[] libServer = new BlobLibraryCacheManager[2];
    PermanentBlobCache cache = null;
    BlobStoreService blobStoreService = null;
    Configuration config = new Configuration();
    config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 3_600L);
    final ExecutorService executorService = Executors.newSingleThreadExecutor();
    try {
        blobStoreService = BlobUtils.createBlobStoreFromConfig(config);
        final BlobLibraryCacheManager.ClassLoaderFactory classLoaderFactory = BlobLibraryCacheManager.defaultClassLoaderFactory(FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST, new String[0], null, true);
        for (int i = 0; i < server.length; i++) {
            server[i] = new BlobServer(config, temporaryFolder.newFolder(), blobStoreService);
            server[i].start();
            serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
            libServer[i] = new BlobLibraryCacheManager(server[i], classLoaderFactory);
        }
        // Random data
        byte[] expected = new byte[1024];
        rand.nextBytes(expected);
        ArrayList<PermanentBlobKey> keys = new ArrayList<>(2);
        JobID jobId = new JobID();
        // Upload some data (libraries)
        // Request 1
        keys.add(server[0].putPermanent(jobId, expected));
        byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);
        // Request 2
        keys.add(server[0].putPermanent(jobId, expected2));
        // The cache
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[0]);
        // Register uploaded libraries
        final LibraryCacheManager.ClassLoaderLease classLoaderLease = libServer[0].registerClassLoaderLease(jobId);
        classLoaderLease.getOrResolveClassLoader(keys, Collections.emptyList());
        // Verify key 1
        File f = cache.getFile(jobId, keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Shutdown cache and start with other server
        cache.close();
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[1]);
        // Verify key 1
        f = cache.getFile(jobId, keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Verify key 2
        f = cache.getFile(jobId, keys.get(1));
        assertEquals(expected2.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < 256 && fis.available() > 0; i++) {
                assertEquals(expected2[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Remove blobs again
        server[1].globalCleanupAsync(jobId, executorService).join();
        // Verify everything is clean below recoveryDir/<cluster_id>
        final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
        String haBlobStorePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH);
        File haBlobStoreDir = new File(haBlobStorePath, clusterId);
        File[] recoveryFiles = haBlobStoreDir.listFiles();
        assertNotNull("HA storage directory does not exist", recoveryFiles);
        assertEquals("Unclean state backend: " + Arrays.toString(recoveryFiles), 0, recoveryFiles.length);
    } finally {
        assertThat(executorService.shutdownNow(), IsEmptyCollection.empty());
        for (BlobLibraryCacheManager s : libServer) {
            if (s != null) {
                s.shutdown();
            }
        }
        for (BlobServer s : server) {
            if (s != null) {
                s.close();
            }
        }
        if (cache != null) {
            cache.close();
        }
        if (blobStoreService != null) {
            blobStoreService.closeAndCleanupAllData();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) BlobStoreService(org.apache.flink.runtime.blob.BlobStoreService) FileInputStream(java.io.FileInputStream) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) Random(java.util.Random) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) ExecutorService(java.util.concurrent.ExecutorService) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 15 with PermanentBlobCache

use of org.apache.flink.runtime.blob.PermanentBlobCache in project flink by splunk.

the class BlobLibraryCacheManagerTest method testRegisterAndDownload.

@Test
public void testRegisterAndDownload() throws IOException {
    // setWritable doesn't work on Windows.
    assumeTrue(!OperatingSystem.isWindows());
    JobID jobId = new JobID();
    BlobServer server = null;
    PermanentBlobCache cache = null;
    BlobLibraryCacheManager libCache = null;
    File cacheDir = null;
    try {
        // create the blob transfer services
        Configuration config = new Configuration();
        config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1_000_000L);
        server = new BlobServer(config, temporaryFolder.newFolder(), new VoidBlobStore());
        server.start();
        InetSocketAddress serverAddress = new InetSocketAddress("localhost", server.getPort());
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), new VoidBlobStore(), serverAddress);
        // upload some meaningless data to the server
        PermanentBlobKey dataKey1 = server.putPermanent(jobId, new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 });
        PermanentBlobKey dataKey2 = server.putPermanent(jobId, new byte[] { 11, 12, 13, 14, 15, 16, 17, 18 });
        libCache = createBlobLibraryCacheManager(cache);
        assertEquals(0, libCache.getNumberOfManagedJobs());
        checkFileCountForJob(2, jobId, server);
        checkFileCountForJob(0, jobId, cache);
        // first try to access a non-existing entry
        assertEquals(0, libCache.getNumberOfReferenceHolders(new JobID()));
        // register some BLOBs as libraries
        {
            Collection<PermanentBlobKey> keys = Collections.singleton(dataKey1);
            cache.registerJob(jobId);
            final LibraryCacheManager.ClassLoaderLease classLoaderLease1 = libCache.registerClassLoaderLease(jobId);
            final UserCodeClassLoader classLoader1 = classLoaderLease1.getOrResolveClassLoader(keys, Collections.emptyList());
            assertEquals(1, libCache.getNumberOfManagedJobs());
            assertEquals(1, libCache.getNumberOfReferenceHolders(jobId));
            assertEquals(1, checkFilesExist(jobId, keys, cache, true));
            checkFileCountForJob(2, jobId, server);
            checkFileCountForJob(1, jobId, cache);
            final LibraryCacheManager.ClassLoaderLease classLoaderLease2 = libCache.registerClassLoaderLease(jobId);
            final UserCodeClassLoader classLoader2 = classLoaderLease2.getOrResolveClassLoader(keys, Collections.emptyList());
            assertThat(classLoader1, sameInstance(classLoader2));
            assertEquals(1, libCache.getNumberOfManagedJobs());
            assertEquals(2, libCache.getNumberOfReferenceHolders(jobId));
            assertEquals(1, checkFilesExist(jobId, keys, cache, true));
            checkFileCountForJob(2, jobId, server);
            checkFileCountForJob(1, jobId, cache);
            // un-register the job
            classLoaderLease1.release();
            // still one task
            assertEquals(1, libCache.getNumberOfManagedJobs());
            assertEquals(1, libCache.getNumberOfReferenceHolders(jobId));
            assertEquals(1, checkFilesExist(jobId, keys, cache, true));
            checkFileCountForJob(2, jobId, server);
            checkFileCountForJob(1, jobId, cache);
            // unregister the task registration
            classLoaderLease2.release();
            assertEquals(0, libCache.getNumberOfManagedJobs());
            assertEquals(0, libCache.getNumberOfReferenceHolders(jobId));
            // changing the libCache registration does not influence the BLOB stores...
            checkFileCountForJob(2, jobId, server);
            checkFileCountForJob(1, jobId, cache);
            cache.releaseJob(jobId);
            // library is still cached (but not associated with job any more)
            checkFileCountForJob(2, jobId, server);
            checkFileCountForJob(1, jobId, cache);
        }
        // see BlobUtils for the directory layout
        cacheDir = cache.getStorageLocation(jobId, new PermanentBlobKey()).getParentFile();
        assertTrue(cacheDir.exists());
        // make sure no further blobs can be downloaded by removing the write
        // permissions from the directory
        assertTrue("Could not remove write permissions from cache directory", cacheDir.setWritable(false, false));
        // since we cannot download this library any more, this call should fail
        try {
            cache.registerJob(jobId);
            final LibraryCacheManager.ClassLoaderLease classLoaderLease = libCache.registerClassLoaderLease(jobId);
            classLoaderLease.getOrResolveClassLoader(Collections.singleton(dataKey2), Collections.emptyList());
            fail("This should fail with an IOException");
        } catch (IOException e) {
            // splendid!
            cache.releaseJob(jobId);
        }
    } finally {
        if (cacheDir != null) {
            if (!cacheDir.setWritable(true, false)) {
                System.err.println("Could not re-add write permissions to cache directory.");
            }
        }
        if (cache != null) {
            cache.close();
        }
        if (libCache != null) {
            libCache.shutdown();
        }
        if (server != null) {
            server.close();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) UserCodeClassLoader(org.apache.flink.util.UserCodeClassLoader) VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) Collection(java.util.Collection) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

InetSocketAddress (java.net.InetSocketAddress)21 Configuration (org.apache.flink.configuration.Configuration)21 BlobServer (org.apache.flink.runtime.blob.BlobServer)21 PermanentBlobCache (org.apache.flink.runtime.blob.PermanentBlobCache)21 VoidBlobStore (org.apache.flink.runtime.blob.VoidBlobStore)18 Test (org.junit.Test)15 ArrayList (java.util.ArrayList)12 JobID (org.apache.flink.api.common.JobID)12 PermanentBlobKey (org.apache.flink.runtime.blob.PermanentBlobKey)12 UserCodeClassLoader (org.apache.flink.util.UserCodeClassLoader)9 Before (org.junit.Before)9 File (java.io.File)6 IOException (java.io.IOException)6 URL (java.net.URL)6 BlobCacheSizeTracker (org.apache.flink.runtime.blob.BlobCacheSizeTracker)6 FileInputStream (java.io.FileInputStream)3 Collection (java.util.Collection)3 List (java.util.List)3 Random (java.util.Random)3 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)3