use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.
the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.
/**
* Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
* information about the allocated slots on the {@link JobMaster}.
*
* <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
* for FLINK-12863.
*/
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
try {
if (hasReceivedSlotOffers.isTriggered()) {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
} else {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
}
} catch (AssertionError e) {
assertionFuture.completeExceptionally(e);
}
if (terminateHeartbeatVerification.get()) {
assertionFuture.complete(null);
}
return FutureUtils.completedVoidFuture();
}).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
terminateHeartbeatVerification.set(true);
// make sure that no assertion has been violated
assertionFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
jobManagerSharedServices.shutdown();
}
}
use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.
the class ActiveResourceManagerTest method testStartNewWorker.
/**
* Tests worker successfully requested, started and registered.
*/
@Test
public void testStartNewWorker() throws Exception {
new Context() {
{
final ResourceID tmResourceId = ResourceID.generate();
final CompletableFuture<TaskExecutorProcessSpec> requestWorkerFromDriverFuture = new CompletableFuture<>();
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
requestWorkerFromDriverFuture.complete(taskExecutorProcessSpec);
return CompletableFuture.completedFuture(tmResourceId);
});
runTest(() -> {
// received worker request, verify requesting from driver
CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
TaskExecutorProcessSpec taskExecutorProcessSpec = requestWorkerFromDriverFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
assertThat(taskExecutorProcessSpec, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
// worker registered, verify registration succeeded
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceId);
assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
});
}
};
}
use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.
the class ActiveResourceManagerTest method testStartNewWorkerFailedRequesting.
/**
* Tests worker failed while requesting.
*/
@Test
public void testStartNewWorkerFailedRequesting() throws Exception {
new Context() {
{
final ResourceID tmResourceId = ResourceID.generate();
final AtomicInteger requestCount = new AtomicInteger(0);
final List<CompletableFuture<ResourceID>> resourceIdFutures = new ArrayList<>();
resourceIdFutures.add(new CompletableFuture<>());
resourceIdFutures.add(new CompletableFuture<>());
final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
int idx = requestCount.getAndIncrement();
assertThat(idx, lessThan(2));
requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
return resourceIdFutures.get(idx);
});
slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
runTest(() -> {
// received worker request, verify requesting from driver
CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
// first request failed, verify requesting another worker from driver
runInMainThread(() -> resourceIdFutures.get(0).completeExceptionally(new Throwable("testing error")));
TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
// second request allocated, verify registration succeed
runInMainThread(() -> resourceIdFutures.get(1).complete(tmResourceId));
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceId);
assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
});
}
};
}
use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.
the class ActiveResourceManagerTest method testWorkerTerminatedAfterRegister.
/**
* Tests worker terminated after registered.
*/
@Test
public void testWorkerTerminatedAfterRegister() throws Exception {
new Context() {
{
final AtomicInteger requestCount = new AtomicInteger(0);
final List<ResourceID> tmResourceIds = new ArrayList<>();
tmResourceIds.add(ResourceID.generate());
tmResourceIds.add(ResourceID.generate());
final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
int idx = requestCount.getAndIncrement();
assertThat(idx, lessThan(2));
requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
return CompletableFuture.completedFuture(tmResourceIds.get(idx));
});
slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
runTest(() -> {
// received worker request, verify requesting from driver
CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
// first worker registered, verify registration succeed
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture1 = registerTaskExecutor(tmResourceIds.get(0));
assertThat(registerTaskExecutorFuture1.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
// first worker terminated, verify requesting another worker from driver
runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
// second worker registered, verify registration succeed
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture2 = registerTaskExecutor(tmResourceIds.get(1));
assertThat(registerTaskExecutorFuture2.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
});
}
};
}
use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.
the class ActiveResourceManagerTest method testStartWorkerIntervalOnWorkerTerminationExceedFailureRate.
@Test
public void testStartWorkerIntervalOnWorkerTerminationExceedFailureRate() throws Exception {
new Context() {
{
flinkConfig.setDouble(ResourceManagerOptions.START_WORKER_MAX_FAILURE_RATE, 1);
flinkConfig.set(ResourceManagerOptions.START_WORKER_RETRY_INTERVAL, Duration.ofMillis(TESTING_START_WORKER_INTERVAL.toMilliseconds()));
final AtomicInteger requestCount = new AtomicInteger(0);
final List<ResourceID> tmResourceIds = new ArrayList<>();
tmResourceIds.add(ResourceID.generate());
tmResourceIds.add(ResourceID.generate());
final List<CompletableFuture<Long>> requestWorkerFromDriverFutures = new ArrayList<>();
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
requestWorkerFromDriverFutures.add(new CompletableFuture<>());
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
int idx = requestCount.getAndIncrement();
assertThat(idx, lessThan(2));
requestWorkerFromDriverFutures.get(idx).complete(System.currentTimeMillis());
return CompletableFuture.completedFuture(tmResourceIds.get(idx));
});
slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
runTest(() -> {
// received worker request, verify requesting from driver
CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
long t1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
// first worker failed before register, verify requesting another worker
// from driver
runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
long t2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
// validate trying creating worker twice, with proper interval
assertThat((t2 - t1), greaterThanOrEqualTo(TESTING_START_WORKER_INTERVAL.toMilliseconds()));
// second worker registered, verify registration succeed
CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceIds.get(1));
assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
});
}
};
}
Aggregations