use of org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor in project flink by apache.
the class DefaultSchedulerTest method testProducedPartitionRegistrationTimeout.
@Test
public void testProducedPartitionRegistrationTimeout() throws Exception {
ScheduledExecutorService scheduledExecutorService = null;
try {
scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
final ComponentMainThreadExecutor mainThreadExecutor = ComponentMainThreadExecutorServiceAdapter.forSingleThreadExecutor(scheduledExecutorService);
shuffleMaster.setAutoCompleteRegistration(false);
final JobGraph jobGraph = nonParallelSourceSinkJobGraph();
timeout = Time.milliseconds(1);
createSchedulerAndStartScheduling(jobGraph, mainThreadExecutor);
testExecutionVertexOperations.awaitCanceledVertices(2);
testExecutionVertexOperations.awaitFailedVertices(1);
} finally {
if (scheduledExecutorService != null) {
scheduledExecutorService.shutdown();
}
}
}
use of org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor in project flink by apache.
the class Execution method sendCancelRpcCall.
/**
* This method sends a CancelTask message to the instance of the assigned slot.
*
* <p>The sending is tried up to NUM_CANCEL_CALL_TRIES times.
*/
private void sendCancelRpcCall(int numberRetries) {
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final ComponentMainThreadExecutor jobMasterMainThreadExecutor = getVertex().getExecutionGraphAccessor().getJobMasterMainThreadExecutor();
CompletableFuture<Acknowledge> cancelResultFuture = FutureUtils.retry(() -> taskManagerGateway.cancelTask(attemptId, rpcTimeout), numberRetries, jobMasterMainThreadExecutor);
cancelResultFuture.whenComplete((ack, failure) -> {
if (failure != null) {
fail(new Exception("Task could not be canceled.", failure));
}
});
}
}
use of org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor in project flink by apache.
the class Execution method deploy.
/**
* Deploys the execution to the previously assigned resource.
*
* @throws JobException if the execution cannot be deployed to the assigned resource
*/
public void deploy() throws JobException {
assertRunningInJobMasterMainThread();
final LogicalSlot slot = assignedResource;
checkNotNull(slot, "In order to deploy the execution we first have to assign a resource via tryAssignResource.");
// The more general check is the rpcTimeout of the deployment call
if (!slot.isAlive()) {
throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
}
// make sure exactly one deployment call happens from the correct state
ExecutionState previous = this.state;
if (previous == SCHEDULED) {
if (!transitionState(previous, DEPLOYING)) {
// this should actually not happen and indicates a race somewhere else
throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
}
} else {
// vertex may have been cancelled, or it was already scheduled
throw new IllegalStateException("The vertex must be in SCHEDULED state to be deployed. Found state " + previous);
}
if (this != slot.getPayload()) {
throw new IllegalStateException(String.format("The execution %s has not been assigned to the assigned slot.", this));
}
try {
// race double check, did we fail/cancel and do we need to release the slot?
if (this.state != DEPLOYING) {
slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING."));
return;
}
LOG.info("Deploying {} (attempt #{}) with attempt id {} and vertex id {} to {} with allocation id {}", vertex.getTaskNameWithSubtaskIndex(), attemptNumber, vertex.getCurrentExecutionAttempt().getAttemptId(), vertex.getID(), getAssignedResourceLocation(), slot.getAllocationId());
final TaskDeploymentDescriptor deployment = TaskDeploymentDescriptorFactory.fromExecutionVertex(vertex, attemptNumber).createDeploymentDescriptor(slot.getAllocationId(), taskRestore, producedPartitions.values());
// null taskRestore to let it be GC'ed
taskRestore = null;
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final ComponentMainThreadExecutor jobMasterMainThreadExecutor = vertex.getExecutionGraphAccessor().getJobMasterMainThreadExecutor();
getVertex().notifyPendingDeployment(this);
// We run the submission in the future executor so that the serialization of large TDDs
// does not block
// the main thread and sync back to the main thread once submission is completed.
CompletableFuture.supplyAsync(() -> taskManagerGateway.submitTask(deployment, rpcTimeout), executor).thenCompose(Function.identity()).whenCompleteAsync((ack, failure) -> {
if (failure == null) {
vertex.notifyCompletedDeployment(this);
} else {
final Throwable actualFailure = ExceptionUtils.stripCompletionException(failure);
if (actualFailure instanceof TimeoutException) {
String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a rpcTimeout of " + rpcTimeout, actualFailure));
} else {
markFailed(actualFailure);
}
}
}, jobMasterMainThreadExecutor);
} catch (Throwable t) {
markFailed(t);
}
}
use of org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor in project flink by apache.
the class OperatorCoordinatorSchedulerTest method setupTestJobAndScheduler.
private DefaultScheduler setupTestJobAndScheduler(OperatorCoordinator.Provider provider, @Nullable TaskExecutorOperatorEventGateway taskExecutorOperatorEventGateway, @Nullable Consumer<JobGraph> jobGraphPreProcessing, boolean restartAllOnFailover) throws Exception {
final OperatorIDPair opIds = OperatorIDPair.of(new OperatorID(), provider.getOperatorId());
final JobVertex vertex = new JobVertex("Vertex with OperatorCoordinator", testVertexId, Collections.singletonList(opIds));
vertex.setInvokableClass(NoOpInvokable.class);
vertex.addOperatorCoordinator(new SerializedValue<>(provider));
vertex.setParallelism(2);
final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertex(vertex).build();
SchedulerTestingUtils.enableCheckpointing(jobGraph);
if (jobGraphPreProcessing != null) {
jobGraphPreProcessing.accept(jobGraph);
}
final ComponentMainThreadExecutor mainThreadExecutor = new ComponentMainThreadExecutorServiceAdapter((ScheduledExecutorService) executor, Thread.currentThread());
final SchedulerTestingUtils.DefaultSchedulerBuilder schedulerBuilder = taskExecutorOperatorEventGateway == null ? SchedulerTestingUtils.createSchedulerBuilder(jobGraph, mainThreadExecutor) : SchedulerTestingUtils.createSchedulerBuilder(jobGraph, mainThreadExecutor, taskExecutorOperatorEventGateway);
if (restartAllOnFailover) {
schedulerBuilder.setFailoverStrategyFactory(new RestartAllFailoverStrategy.Factory());
}
final DefaultScheduler scheduler = schedulerBuilder.setFutureExecutor(executor).setDelayExecutor(executor).build();
this.createdScheduler = scheduler;
return scheduler;
}
use of org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor in project flink by apache.
the class OperatorCoordinatorHolderTest method checkpointEventValueAtomicity.
private void checkpointEventValueAtomicity(final Function<OperatorCoordinator.Context, OperatorCoordinator> coordinatorCtor) throws Exception {
final ManuallyTriggeredScheduledExecutorService executor = new ManuallyTriggeredScheduledExecutorService();
final ComponentMainThreadExecutor mainThreadExecutor = new ComponentMainThreadExecutorServiceAdapter((ScheduledExecutorService) executor, Thread.currentThread());
final EventReceivingTasks sender = EventReceivingTasks.createForRunningTasks();
final OperatorCoordinatorHolder holder = createCoordinatorHolder(sender, coordinatorCtor, mainThreadExecutor);
// give the coordinator some time to emit some events. This isn't strictly necessary,
// but it randomly alters the timings between the coordinator's thread (event sender) and
// the main thread (holder). This should produce a flaky test if we missed some corner
// cases.
Thread.sleep(new Random().nextInt(10));
executor.triggerAll();
// trigger the checkpoint - this should also shut the valve as soon as the future is
// completed
final CompletableFuture<byte[]> checkpointFuture = new CompletableFuture<>();
holder.checkpointCoordinator(0L, checkpointFuture);
executor.triggerAll();
// give the coordinator some time to emit some events. Same as above, this adds some
// randomization
Thread.sleep(new Random().nextInt(10));
holder.close();
executor.triggerAll();
assertTrue(checkpointFuture.isDone());
final int checkpointedNumber = bytesToInt(checkpointFuture.get());
assertEquals(checkpointedNumber, sender.getNumberOfSentEvents());
for (int i = 0; i < checkpointedNumber; i++) {
assertEquals(i, ((TestOperatorEvent) sender.getAllSentEvents().get(i).event).getValue());
}
}
Aggregations