use of org.apache.flink.runtime.blob.BlobServer in project flink by apache.
the class AbstractTaskManagerFileHandlerTest method setup.
@BeforeClass
public static void setup() throws IOException, HandlerRequestException {
final Configuration configuration = new Configuration();
blobServer = new BlobServer(configuration, temporaryFolder.newFolder(), new VoidBlobStore());
handlerRequest = HandlerRequest.resolveParametersAndCreate(EmptyRequestBody.getInstance(), new TaskManagerFileMessageParameters(), Collections.singletonMap(TaskManagerIdPathParameter.KEY, EXPECTED_TASK_MANAGER_ID.getResourceIdString()), Collections.emptyMap(), Collections.emptyList());
}
use of org.apache.flink.runtime.blob.BlobServer in project flink by apache.
the class AbstractDispatcherTest method setUp.
@Before
public void setUp() throws Exception {
heartbeatServices = new HeartbeatServices(1000L, 10000L);
haServices = new TestingHighAvailabilityServices();
haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
haServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService());
haServices.setJobGraphStore(new StandaloneJobGraphStore());
haServices.setJobResultStore(new EmbeddedJobResultStore());
configuration = new Configuration();
blobServer = new BlobServer(configuration, temporaryFolder.newFolder(), new VoidBlobStore());
}
use of org.apache.flink.runtime.blob.BlobServer in project flink by apache.
the class ZooKeeperDefaultDispatcherRunnerTest method testResourceCleanupUnderLeadershipChange.
/**
* See FLINK-11665.
*/
@Test
public void testResourceCleanupUnderLeadershipChange() throws Exception {
final TestingRpcService rpcService = testingRpcServiceResource.getTestingRpcService();
final TestingLeaderElectionService dispatcherLeaderElectionService = new TestingLeaderElectionService();
final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration, fatalErrorHandler).asCuratorFramework();
try (final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder().setDispatcherLeaderElectionService(dispatcherLeaderElectionService).setJobMasterLeaderRetrieverFunction(jobId -> ZooKeeperUtils.createLeaderRetrievalService(client)).build()) {
final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, CompletableFuture::new, blobServer, new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryExecutionGraphInfoStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null, ForkJoinPool.commonPool(), new DispatcherOperationCaches());
final DefaultDispatcherRunnerFactory defaultDispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE);
try (final DispatcherRunner dispatcherRunner = createDispatcherRunner(rpcService, dispatcherLeaderElectionService, new JobPersistenceComponentFactory() {
@Override
public JobGraphStore createJobGraphStore() {
return createZooKeeperJobGraphStore(client);
}
@Override
public JobResultStore createJobResultStore() {
return new EmbeddedJobResultStore();
}
}, partialDispatcherServices, defaultDispatcherRunnerFactory)) {
// initial run
DispatcherGateway dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
final JobGraph jobGraph = createJobGraphWithBlobs();
LOG.info("Initial job submission {}.", jobGraph.getJobID());
dispatcherGateway.submitJob(jobGraph, TESTING_TIMEOUT).get();
dispatcherLeaderElectionService.notLeader();
// recovering submitted jobs
LOG.info("Re-grant leadership first time.");
dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
LOG.info("Cancel recovered job {}.", jobGraph.getJobID());
// cancellation of the job should remove everything
final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TESTING_TIMEOUT);
dispatcherGateway.cancelJob(jobGraph.getJobID(), TESTING_TIMEOUT).get();
// a successful cancellation should eventually remove all job information
final JobResult jobResult = jobResultFuture.get();
assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED));
dispatcherLeaderElectionService.notLeader();
// check that the job has been removed from ZooKeeper
final JobGraphStore submittedJobGraphStore = createZooKeeperJobGraphStore(client);
CommonTestUtils.waitUntilCondition(() -> submittedJobGraphStore.getJobIds().isEmpty(), Deadline.fromNow(VERIFICATION_TIMEOUT), 20L);
}
}
// check resource clean up
assertThat(clusterHaStorageDir.listFiles(), is(emptyArray()));
}
use of org.apache.flink.runtime.blob.BlobServer in project flink by apache.
the class BlobLibraryCacheManagerTest method testLibraryCacheManagerDifferentJobsCleanup.
/**
* Tests that the {@link BlobLibraryCacheManager} cleans up after the class loader leases for
* different jobs are closed.
*/
@Test
public void testLibraryCacheManagerDifferentJobsCleanup() throws Exception {
JobID jobId1 = new JobID();
JobID jobId2 = new JobID();
List<PermanentBlobKey> keys1 = new ArrayList<>();
List<PermanentBlobKey> keys2 = new ArrayList<>();
BlobServer server = null;
PermanentBlobCache cache = null;
BlobLibraryCacheManager libCache = null;
final byte[] buf = new byte[128];
try {
Configuration config = new Configuration();
config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1L);
server = new BlobServer(config, temporaryFolder.newFolder(), new VoidBlobStore());
server.start();
InetSocketAddress serverAddress = new InetSocketAddress("localhost", server.getPort());
cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), new VoidBlobStore(), serverAddress);
keys1.add(server.putPermanent(jobId1, buf));
buf[0] += 1;
keys1.add(server.putPermanent(jobId1, buf));
keys2.add(server.putPermanent(jobId2, buf));
libCache = createBlobLibraryCacheManager(cache);
cache.registerJob(jobId1);
cache.registerJob(jobId2);
assertEquals(0, libCache.getNumberOfManagedJobs());
assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
checkFileCountForJob(2, jobId1, server);
checkFileCountForJob(0, jobId1, cache);
checkFileCountForJob(1, jobId2, server);
checkFileCountForJob(0, jobId2, cache);
final LibraryCacheManager.ClassLoaderLease classLoaderLeaseJob1 = libCache.registerClassLoaderLease(jobId1);
final UserCodeClassLoader classLoader1 = classLoaderLeaseJob1.getOrResolveClassLoader(keys1, Collections.emptyList());
assertEquals(1, libCache.getNumberOfManagedJobs());
assertEquals(1, libCache.getNumberOfReferenceHolders(jobId1));
assertEquals(0, libCache.getNumberOfReferenceHolders(jobId2));
assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
checkFileCountForJob(2, jobId1, server);
checkFileCountForJob(2, jobId1, cache);
assertEquals(0, checkFilesExist(jobId2, keys2, cache, false));
checkFileCountForJob(1, jobId2, server);
checkFileCountForJob(0, jobId2, cache);
final LibraryCacheManager.ClassLoaderLease classLoaderLeaseJob2 = libCache.registerClassLoaderLease(jobId2);
final UserCodeClassLoader classLoader2 = classLoaderLeaseJob2.getOrResolveClassLoader(keys2, Collections.emptyList());
assertThat(classLoader1, not(sameInstance(classLoader2)));
try {
classLoaderLeaseJob2.getOrResolveClassLoader(keys1, Collections.<URL>emptyList());
fail("Should fail with an IllegalStateException");
} catch (IllegalStateException e) {
// that's what we want
}
try {
classLoaderLeaseJob2.getOrResolveClassLoader(keys2, Collections.singletonList(new URL("file:///tmp/does-not-exist")));
fail("Should fail with an IllegalStateException");
} catch (IllegalStateException e) {
// that's what we want
}
assertEquals(2, libCache.getNumberOfManagedJobs());
assertEquals(1, libCache.getNumberOfReferenceHolders(jobId1));
assertEquals(1, libCache.getNumberOfReferenceHolders(jobId2));
assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
checkFileCountForJob(2, jobId1, server);
checkFileCountForJob(2, jobId1, cache);
assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
checkFileCountForJob(1, jobId2, server);
checkFileCountForJob(1, jobId2, cache);
classLoaderLeaseJob1.release();
assertEquals(1, libCache.getNumberOfManagedJobs());
assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
assertEquals(1, libCache.getNumberOfReferenceHolders(jobId2));
assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
checkFileCountForJob(2, jobId1, server);
checkFileCountForJob(2, jobId1, cache);
assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
checkFileCountForJob(1, jobId2, server);
checkFileCountForJob(1, jobId2, cache);
classLoaderLeaseJob2.release();
assertEquals(0, libCache.getNumberOfManagedJobs());
assertEquals(0, libCache.getNumberOfReferenceHolders(jobId1));
assertEquals(0, libCache.getNumberOfReferenceHolders(jobId2));
assertEquals(2, checkFilesExist(jobId1, keys1, cache, true));
checkFileCountForJob(2, jobId1, server);
checkFileCountForJob(2, jobId1, cache);
assertEquals(1, checkFilesExist(jobId2, keys2, cache, true));
checkFileCountForJob(1, jobId2, server);
checkFileCountForJob(1, jobId2, cache);
// only PermanentBlobCache#releaseJob() calls clean up files (tested in
// BlobCacheCleanupTest etc.
} finally {
if (libCache != null) {
libCache.shutdown();
}
// should have been closed by the libraryCacheManager, but just in case
if (cache != null) {
cache.close();
}
if (server != null) {
server.close();
}
}
}
use of org.apache.flink.runtime.blob.BlobServer in project flink by apache.
the class BlobLibraryCacheRecoveryITCase method testRecoveryRegisterAndDownload.
/**
* Tests that with {@link HighAvailabilityMode#ZOOKEEPER} distributed JARs are recoverable from
* any participating BlobLibraryCacheManager.
*/
@Test
public void testRecoveryRegisterAndDownload() throws Exception {
Random rand = new Random();
BlobServer[] server = new BlobServer[2];
InetSocketAddress[] serverAddress = new InetSocketAddress[2];
BlobLibraryCacheManager[] libServer = new BlobLibraryCacheManager[2];
PermanentBlobCache cache = null;
BlobStoreService blobStoreService = null;
Configuration config = new Configuration();
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 3_600L);
final ExecutorService executorService = Executors.newSingleThreadExecutor();
try {
blobStoreService = BlobUtils.createBlobStoreFromConfig(config);
final BlobLibraryCacheManager.ClassLoaderFactory classLoaderFactory = BlobLibraryCacheManager.defaultClassLoaderFactory(FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST, new String[0], null, true);
for (int i = 0; i < server.length; i++) {
server[i] = new BlobServer(config, temporaryFolder.newFolder(), blobStoreService);
server[i].start();
serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
libServer[i] = new BlobLibraryCacheManager(server[i], classLoaderFactory);
}
// Random data
byte[] expected = new byte[1024];
rand.nextBytes(expected);
ArrayList<PermanentBlobKey> keys = new ArrayList<>(2);
JobID jobId = new JobID();
// Upload some data (libraries)
// Request 1
keys.add(server[0].putPermanent(jobId, expected));
byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);
// Request 2
keys.add(server[0].putPermanent(jobId, expected2));
// The cache
cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[0]);
// Register uploaded libraries
final LibraryCacheManager.ClassLoaderLease classLoaderLease = libServer[0].registerClassLoaderLease(jobId);
classLoaderLease.getOrResolveClassLoader(keys, Collections.emptyList());
// Verify key 1
File f = cache.getFile(jobId, keys.get(0));
assertEquals(expected.length, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < expected.length && fis.available() > 0; i++) {
assertEquals(expected[i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Shutdown cache and start with other server
cache.close();
cache = new PermanentBlobCache(config, temporaryFolder.newFolder(), blobStoreService, serverAddress[1]);
// Verify key 1
f = cache.getFile(jobId, keys.get(0));
assertEquals(expected.length, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < expected.length && fis.available() > 0; i++) {
assertEquals(expected[i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Verify key 2
f = cache.getFile(jobId, keys.get(1));
assertEquals(expected2.length, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < 256 && fis.available() > 0; i++) {
assertEquals(expected2[i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Remove blobs again
server[1].globalCleanupAsync(jobId, executorService).join();
// Verify everything is clean below recoveryDir/<cluster_id>
final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
String haBlobStorePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH);
File haBlobStoreDir = new File(haBlobStorePath, clusterId);
File[] recoveryFiles = haBlobStoreDir.listFiles();
assertNotNull("HA storage directory does not exist", recoveryFiles);
assertEquals("Unclean state backend: " + Arrays.toString(recoveryFiles), 0, recoveryFiles.length);
} finally {
assertThat(executorService.shutdownNow(), IsEmptyCollection.empty());
for (BlobLibraryCacheManager s : libServer) {
if (s != null) {
s.shutdown();
}
}
for (BlobServer s : server) {
if (s != null) {
s.close();
}
}
if (cache != null) {
cache.close();
}
if (blobStoreService != null) {
blobStoreService.closeAndCleanupAllData();
}
}
}
Aggregations