Search in sources :

Example 11 with PermanentBlobKey

use of org.apache.flink.runtime.blob.PermanentBlobKey in project flink by apache.

the class IntermediateResult method clearCachedInformationForPartitionGroup.

public void clearCachedInformationForPartitionGroup(ConsumedPartitionGroup consumedPartitionGroup) {
    // When a ConsumedPartitionGroup changes, the cache of ShuffleDescriptors for this
    // partition group is no longer valid and needs to be removed.
    // 
    // Currently, there are two scenarios:
    // 1. The ConsumedPartitionGroup is released
    // 2. Its producer encounters a failover
    // Remove the cache for the ConsumedPartitionGroup and notify the BLOB writer to delete the
    // cache if it is offloaded
    final MaybeOffloaded<ShuffleDescriptor[]> cache = this.shuffleDescriptorCache.remove(consumedPartitionGroup);
    if (cache instanceof Offloaded) {
        PermanentBlobKey blobKey = ((Offloaded<ShuffleDescriptor[]>) cache).serializedValueKey;
        this.producer.getGraph().deleteBlobs(Collections.singletonList(blobKey));
    }
}
Also used : Offloaded(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor.Offloaded) MaybeOffloaded(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor.MaybeOffloaded) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor)

Example 12 with PermanentBlobKey

use of org.apache.flink.runtime.blob.PermanentBlobKey in project flink by apache.

the class DispatcherTest method testOnlyRecoveredJobsAreRetainedInTheBlobServer.

@Test
public void testOnlyRecoveredJobsAreRetainedInTheBlobServer() throws Exception {
    final JobID jobId1 = new JobID();
    final JobID jobId2 = new JobID();
    final byte[] fileContent = { 1, 2, 3, 4 };
    final BlobServer blobServer = getBlobServer();
    final PermanentBlobKey blobKey1 = blobServer.putPermanent(jobId1, fileContent);
    final PermanentBlobKey blobKey2 = blobServer.putPermanent(jobId2, fileContent);
    dispatcher = createTestingDispatcherBuilder().setRecoveredJobs(Collections.singleton(new JobGraph(jobId1, "foobar"))).build();
    Assertions.assertThat(blobServer.getFile(jobId1, blobKey1)).hasBinaryContent(fileContent);
    Assertions.assertThatThrownBy(() -> blobServer.getFile(jobId2, blobKey2)).isInstanceOf(NoSuchFileException.class);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 13 with PermanentBlobKey

use of org.apache.flink.runtime.blob.PermanentBlobKey in project flink by apache.

the class BlobLibraryCacheManagerTest method testLibraryCacheManagerDifferentJobsCleanup.

/**
 * Tests that the {@link BlobLibraryCacheManager} cleans up after the class loader leases for
 * different jobs are closed.
 */
@Test
public void testLibraryCacheManagerDifferentJobsCleanup() throws Exception {
    JobID jobId1 = new JobID();
    JobID jobId2 = new JobID();
    List<PermanentBlobKey> keys1 = new ArrayList<>();
    List<PermanentBlobKey> keys2 = new ArrayList<>();
    BlobServer server = null;
    PermanentBlobCache cache = null;
    BlobLibraryCacheManager libCache = null;
    final byte[] buf = new byte[128];
    try {
        Configuration config = new Configuration();
        config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1L);
        server = new BlobServer(config, temporaryFolder.newFolder(), new VoidBlobStore());
        server.start();
        InetSocketAddress serverAddress = new InetSocketAddress("localhost", server.getPort());
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), new VoidBlobStore(), serverAddress);
        keys1.add(server.putPermanent(jobId1, buf));
        buf[0] += 1;
        keys1.add(server.putPermanent(jobId1, buf));
        keys2.add(server.putPermanent(jobId2, buf));
        libCache = createBlobLibraryCacheManager(cache);
        cache.registerJob(jobId1);
        cache.registerJob(jobId2);
        assertEquals(0, libCache.getNumberOfManagedJobs());
        assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
        checkFileCountForJob(2, jobId1, server);
        checkFileCountForJob(0, jobId1, cache);
        checkFileCountForJob(1, jobId2, server);
        checkFileCountForJob(0, jobId2, cache);
        final LibraryCacheManager.ClassLoaderLease classLoaderLeaseJob1 = libCache.registerClassLoaderLease(jobId1);
        final UserCodeClassLoader classLoader1 = classLoaderLeaseJob1.getOrResolveClassLoader(keys1, Collections.emptyList());
        assertEquals(1, libCache.getNumberOfManagedJobs());
        assertEquals(1, libCache.getNumberOfReferenceHolders(jobId1));
        assertEquals(0, libCache.getNumberOfReferenceHolders(jobId2));
        assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
        checkFileCountForJob(2, jobId1, server);
        checkFileCountForJob(2, jobId1, cache);
        assertEquals(0, checkFilesExist(jobId2, keys2, cache, false));
        checkFileCountForJob(1, jobId2, server);
        checkFileCountForJob(0, jobId2, cache);
        final LibraryCacheManager.ClassLoaderLease classLoaderLeaseJob2 = libCache.registerClassLoaderLease(jobId2);
        final UserCodeClassLoader classLoader2 = classLoaderLeaseJob2.getOrResolveClassLoader(keys2, Collections.emptyList());
        assertThat(classLoader1, not(sameInstance(classLoader2)));
        try {
            classLoaderLeaseJob2.getOrResolveClassLoader(keys1, Collections.<URL>emptyList());
            fail("Should fail with an IllegalStateException");
        } catch (IllegalStateException e) {
        // that's what we want
        }
        try {
            classLoaderLeaseJob2.getOrResolveClassLoader(keys2, Collections.singletonList(new URL("file:///tmp/does-not-exist")));
            fail("Should fail with an IllegalStateException");
        } catch (IllegalStateException e) {
        // that's what we want
        }
        assertEquals(2, libCache.getNumberOfManagedJobs());
        assertEquals(1, libCache.getNumberOfReferenceHolders(jobId1));
        assertEquals(1, libCache.getNumberOfReferenceHolders(jobId2));
        assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
        checkFileCountForJob(2, jobId1, server);
        checkFileCountForJob(2, jobId1, cache);
        assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
        checkFileCountForJob(1, jobId2, server);
        checkFileCountForJob(1, jobId2, cache);
        classLoaderLeaseJob1.release();
        assertEquals(1, libCache.getNumberOfManagedJobs());
        assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
        assertEquals(1, libCache.getNumberOfReferenceHolders(jobId2));
        assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
        checkFileCountForJob(2, jobId1, server);
        checkFileCountForJob(2, jobId1, cache);
        assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
        checkFileCountForJob(1, jobId2, server);
        checkFileCountForJob(1, jobId2, cache);
        classLoaderLeaseJob2.release();
        assertEquals(0, libCache.getNumberOfManagedJobs());
        assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
        assertEquals(0, libCache.getNumberOfReferenceHolders(jobId2));
        assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
        checkFileCountForJob(2, jobId1, server);
        checkFileCountForJob(2, jobId1, cache);
        assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
        checkFileCountForJob(1, jobId2, server);
        checkFileCountForJob(1, jobId2, cache);
    // only PermanentBlobCache#releaseJob() calls clean up files (tested in
    // BlobCacheCleanupTest etc.
    } finally {
        if (libCache != null) {
            libCache.shutdown();
        }
        // should have been closed by the libraryCacheManager, but just in case
        if (cache != null) {
            cache.close();
        }
        if (server != null) {
            server.close();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) URL(java.net.URL) UserCodeClassLoader(org.apache.flink.util.UserCodeClassLoader) VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 14 with PermanentBlobKey

use of org.apache.flink.runtime.blob.PermanentBlobKey in project flink by apache.

the class BlobLibraryCacheRecoveryITCase method testRecoveryRegisterAndDownload.

/**
 * Tests that with {@link HighAvailabilityMode#ZOOKEEPER} distributed JARs are recoverable from
 * any participating BlobLibraryCacheManager.
 */
@Test
public void testRecoveryRegisterAndDownload() throws Exception {
    Random rand = new Random();
    BlobServer[] server = new BlobServer[2];
    InetSocketAddress[] serverAddress = new InetSocketAddress[2];
    BlobLibraryCacheManager[] libServer = new BlobLibraryCacheManager[2];
    PermanentBlobCache cache = null;
    BlobStoreService blobStoreService = null;
    Configuration config = new Configuration();
    config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 3_600L);
    final ExecutorService executorService = Executors.newSingleThreadExecutor();
    try {
        blobStoreService = BlobUtils.createBlobStoreFromConfig(config);
        final BlobLibraryCacheManager.ClassLoaderFactory classLoaderFactory = BlobLibraryCacheManager.defaultClassLoaderFactory(FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST, new String[0], null, true);
        for (int i = 0; i < server.length; i++) {
            server[i] = new BlobServer(config, temporaryFolder.newFolder(), blobStoreService);
            server[i].start();
            serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
            libServer[i] = new BlobLibraryCacheManager(server[i], classLoaderFactory);
        }
        // Random data
        byte[] expected = new byte[1024];
        rand.nextBytes(expected);
        ArrayList<PermanentBlobKey> keys = new ArrayList<>(2);
        JobID jobId = new JobID();
        // Upload some data (libraries)
        // Request 1
        keys.add(server[0].putPermanent(jobId, expected));
        byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);
        // Request 2
        keys.add(server[0].putPermanent(jobId, expected2));
        // The cache
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[0]);
        // Register uploaded libraries
        final LibraryCacheManager.ClassLoaderLease classLoaderLease = libServer[0].registerClassLoaderLease(jobId);
        classLoaderLease.getOrResolveClassLoader(keys, Collections.emptyList());
        // Verify key 1
        File f = cache.getFile(jobId, keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Shutdown cache and start with other server
        cache.close();
        cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[1]);
        // Verify key 1
        f = cache.getFile(jobId, keys.get(0));
        assertEquals(expected.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < expected.length && fis.available() > 0; i++) {
                assertEquals(expected[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Verify key 2
        f = cache.getFile(jobId, keys.get(1));
        assertEquals(expected2.length, f.length());
        try (FileInputStream fis = new FileInputStream(f)) {
            for (int i = 0; i < 256 && fis.available() > 0; i++) {
                assertEquals(expected2[i], (byte) fis.read());
            }
            assertEquals(0, fis.available());
        }
        // Remove blobs again
        server[1].globalCleanupAsync(jobId, executorService).join();
        // Verify everything is clean below recoveryDir/<cluster_id>
        final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
        String haBlobStorePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH);
        File haBlobStoreDir = new File(haBlobStorePath, clusterId);
        File[] recoveryFiles = haBlobStoreDir.listFiles();
        assertNotNull("HA storage directory does not exist", recoveryFiles);
        assertEquals("Unclean state backend: " + Arrays.toString(recoveryFiles), 0, recoveryFiles.length);
    } finally {
        assertThat(executorService.shutdownNow(), IsEmptyCollection.empty());
        for (BlobLibraryCacheManager s : libServer) {
            if (s != null) {
                s.shutdown();
            }
        }
        for (BlobServer s : server) {
            if (s != null) {
                s.close();
            }
        }
        if (cache != null) {
            cache.close();
        }
        if (blobStoreService != null) {
            blobStoreService.closeAndCleanupAllData();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) BlobStoreService(org.apache.flink.runtime.blob.BlobStoreService) FileInputStream(java.io.FileInputStream) PermanentBlobCache(org.apache.flink.runtime.blob.PermanentBlobCache) Random(java.util.Random) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) ExecutorService(java.util.concurrent.ExecutorService) BlobServer(org.apache.flink.runtime.blob.BlobServer) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 15 with PermanentBlobKey

use of org.apache.flink.runtime.blob.PermanentBlobKey in project flink by apache.

the class TaskDeploymentDescriptor method loadBigData.

/**
 * Loads externalized data from the BLOB store back to the object.
 *
 * @param blobService the blob store to use (may be <tt>null</tt> if {@link
 *     #serializedJobInformation} and {@link #serializedTaskInformation} are non-<tt>null</tt>)
 * @throws IOException during errors retrieving or reading the BLOBs
 * @throws ClassNotFoundException Class of a serialized object cannot be found.
 */
public void loadBigData(@Nullable PermanentBlobService blobService) throws IOException, ClassNotFoundException {
    // here, if this fails, we need to throw the exception as there is no backup path anymore
    if (serializedJobInformation instanceof Offloaded) {
        PermanentBlobKey jobInfoKey = ((Offloaded<JobInformation>) serializedJobInformation).serializedValueKey;
        Preconditions.checkNotNull(blobService);
        final File dataFile = blobService.getFile(jobId, jobInfoKey);
        // NOTE: Do not delete the job info BLOB since it may be needed again during recovery.
        // (it is deleted automatically on the BLOB server and cache when the job
        // enters a terminal state)
        SerializedValue<JobInformation> serializedValue = SerializedValue.fromBytes(FileUtils.readAllBytes(dataFile.toPath()));
        serializedJobInformation = new NonOffloaded<>(serializedValue);
    }
    // re-integrate offloaded task info from blob
    if (serializedTaskInformation instanceof Offloaded) {
        PermanentBlobKey taskInfoKey = ((Offloaded<TaskInformation>) serializedTaskInformation).serializedValueKey;
        Preconditions.checkNotNull(blobService);
        final File dataFile = blobService.getFile(jobId, taskInfoKey);
        // NOTE: Do not delete the task info BLOB since it may be needed again during recovery.
        // (it is deleted automatically on the BLOB server and cache when the job
        // enters a terminal state)
        SerializedValue<TaskInformation> serializedValue = SerializedValue.fromBytes(FileUtils.readAllBytes(dataFile.toPath()));
        serializedTaskInformation = new NonOffloaded<>(serializedValue);
    }
    for (InputGateDeploymentDescriptor inputGate : inputGates) {
        inputGate.loadBigData(blobService, jobId);
    }
    // make sure that the serialized job and task information fields are filled
    Preconditions.checkNotNull(serializedJobInformation);
    Preconditions.checkNotNull(serializedTaskInformation);
}
Also used : JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) File(java.io.File)

Aggregations

PermanentBlobKey (org.apache.flink.runtime.blob.PermanentBlobKey)15 JobID (org.apache.flink.api.common.JobID)8 Test (org.junit.Test)8 InetSocketAddress (java.net.InetSocketAddress)6 Configuration (org.apache.flink.configuration.Configuration)6 BlobServer (org.apache.flink.runtime.blob.BlobServer)6 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 File (java.io.File)4 PermanentBlobCache (org.apache.flink.runtime.blob.PermanentBlobCache)4 VoidBlobStore (org.apache.flink.runtime.blob.VoidBlobStore)4 ArrayList (java.util.ArrayList)3 UserCodeClassLoader (org.apache.flink.util.UserCodeClassLoader)3 URL (java.net.URL)2 Path (org.apache.flink.core.fs.Path)2 MaybeOffloaded (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor.MaybeOffloaded)2 Offloaded (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor.Offloaded)2 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)2 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 Collection (java.util.Collection)1