use of org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway in project flink by apache.
the class CheckpointCoordinatorTest method testTasksFinishDuringTriggering.
@Test
public void testTasksFinishDuringTriggering() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().setTransitToRunning(false).addJobVertex(jobVertexID1, 1, 256).addJobVertex(jobVertexID2, 1, 256).build();
ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
ExecutionVertex taskVertex = jobVertex1.getTaskVertices()[0];
ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
ExecutionVertex taskVertex2 = jobVertex2.getTaskVertices()[0];
AtomicBoolean checkpointAborted = new AtomicBoolean(false);
LogicalSlot slot1 = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {
@Override
public CompletableFuture<Acknowledge> triggerCheckpoint(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long timestamp, CheckpointOptions checkpointOptions) {
taskVertex.getCurrentExecutionAttempt().markFinished();
return FutureUtils.completedExceptionally(new RpcException(""));
}
}).createTestingLogicalSlot();
LogicalSlot slot2 = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {
@Override
public void notifyCheckpointAborted(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long latestCompletedCheckpointId, long timestamp) {
checkpointAborted.set(true);
}
}).createTestingLogicalSlot();
ExecutionGraphTestUtils.setVertexResource(taskVertex, slot1);
taskVertex.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
ExecutionGraphTestUtils.setVertexResource(taskVertex2, slot2);
taskVertex2.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).setAllowCheckpointsAfterTasksFinished(true).build();
// nothing should be happening
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
// trigger the first checkpoint. this will not fail because we allow checkpointing even with
// finished tasks
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertTrue(checkpointFuture.isCompletedExceptionally());
assertTrue(checkpointAborted.get());
}
use of org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway in project flink by apache.
the class DefaultExecutionGraphDeploymentTest method testBuildDeploymentDescriptor.
@Test
public void testBuildDeploymentDescriptor() throws Exception {
final JobVertexID jid1 = new JobVertexID();
final JobVertexID jid2 = new JobVertexID();
final JobVertexID jid3 = new JobVertexID();
final JobVertexID jid4 = new JobVertexID();
JobVertex v1 = new JobVertex("v1", jid1);
JobVertex v2 = new JobVertex("v2", jid2);
JobVertex v3 = new JobVertex("v3", jid3);
JobVertex v4 = new JobVertex("v4", jid4);
v1.setParallelism(10);
v2.setParallelism(10);
v3.setParallelism(10);
v4.setParallelism(10);
v1.setInvokableClass(BatchTask.class);
v2.setInvokableClass(BatchTask.class);
v3.setInvokableClass(BatchTask.class);
v4.setInvokableClass(BatchTask.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(v1, v2, v3, v4);
final JobID jobId = jobGraph.getJobID();
DirectScheduledExecutorService executor = new DirectScheduledExecutorService();
DefaultExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(jobGraph).setFutureExecutor(executor).setIoExecutor(executor).setBlobWriter(blobWriter).build();
eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
checkJobOffloaded(eg);
ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
ExecutionVertex vertex = ejv.getTaskVertices()[3];
final SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
final CompletableFuture<TaskDeploymentDescriptor> tdd = new CompletableFuture<>();
taskManagerGateway.setSubmitConsumer(FunctionUtils.uncheckedConsumer(taskDeploymentDescriptor -> {
taskDeploymentDescriptor.loadBigData(blobCache);
tdd.complete(taskDeploymentDescriptor);
}));
final LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(taskManagerGateway).createTestingLogicalSlot();
assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
vertex.getCurrentExecutionAttempt().transitionState(ExecutionState.SCHEDULED);
vertex.getCurrentExecutionAttempt().registerProducedPartitions(slot.getTaskManagerLocation(), true).get();
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
checkTaskOffloaded(eg, vertex.getJobvertexId());
TaskDeploymentDescriptor descr = tdd.get();
assertNotNull(descr);
JobInformation jobInformation = descr.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
TaskInformation taskInformation = descr.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
assertEquals(jobId, descr.getJobId());
assertEquals(jobId, jobInformation.getJobId());
assertEquals(jid2, taskInformation.getJobVertexId());
assertEquals(3, descr.getSubtaskIndex());
assertEquals(10, taskInformation.getNumberOfSubtasks());
assertEquals(BatchTask.class.getName(), taskInformation.getInvokableClassName());
assertEquals("v2", taskInformation.getTaskName());
Collection<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
Collection<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();
assertEquals(2, producedPartitions.size());
assertEquals(1, consumedPartitions.size());
Iterator<ResultPartitionDeploymentDescriptor> iteratorProducedPartitions = producedPartitions.iterator();
Iterator<InputGateDeploymentDescriptor> iteratorConsumedPartitions = consumedPartitions.iterator();
assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
ShuffleDescriptor[] shuffleDescriptors = iteratorConsumedPartitions.next().getShuffleDescriptors();
assertEquals(10, shuffleDescriptors.length);
Iterator<ConsumedPartitionGroup> iteratorConsumedPartitionGroup = vertex.getAllConsumedPartitionGroups().iterator();
int idx = 0;
for (IntermediateResultPartitionID partitionId : iteratorConsumedPartitionGroup.next()) {
assertEquals(partitionId, shuffleDescriptors[idx++].getResultPartitionID().getPartitionId());
}
}
use of org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway in project flink by apache.
the class ExecutionPartitionLifecycleTest method testPartitionTrackingForStateTransition.
private void testPartitionTrackingForStateTransition(final Consumer<Execution> stateTransition, final PartitionReleaseResult partitionReleaseResult) throws Exception {
CompletableFuture<Tuple2<ResourceID, ResultPartitionDeploymentDescriptor>> partitionStartTrackingFuture = new CompletableFuture<>();
CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingFuture = new CompletableFuture<>();
CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingAndReleaseFuture = new CompletableFuture<>();
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setStartTrackingPartitionsConsumer((resourceID, resultPartitionDeploymentDescriptor) -> partitionStartTrackingFuture.complete(Tuple2.of(resourceID, resultPartitionDeploymentDescriptor)));
partitionTracker.setStopTrackingPartitionsConsumer(partitionStopTrackingFuture::complete);
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionStopTrackingAndReleaseFuture::complete);
setupExecutionGraphAndStartRunningJob(ResultPartitionType.BLOCKING, partitionTracker, new SimpleAckingTaskManagerGateway(), ShuffleTestUtils.DEFAULT_SHUFFLE_MASTER);
Tuple2<ResourceID, ResultPartitionDeploymentDescriptor> startTrackingCall = partitionStartTrackingFuture.get();
assertThat(startTrackingCall.f0, equalTo(taskExecutorResourceId));
assertThat(startTrackingCall.f1, equalTo(descriptor));
stateTransition.accept(execution);
switch(partitionReleaseResult) {
case NONE:
assertFalse(partitionStopTrackingFuture.isDone());
assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
break;
case STOP_TRACKING:
assertTrue(partitionStopTrackingFuture.isDone());
assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
final Collection<ResultPartitionID> stopTrackingCall = partitionStopTrackingFuture.get();
assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingCall);
break;
case STOP_TRACKING_AND_RELEASE:
assertFalse(partitionStopTrackingFuture.isDone());
assertTrue(partitionStopTrackingAndReleaseFuture.isDone());
final Collection<ResultPartitionID> stopTrackingAndReleaseCall = partitionStopTrackingAndReleaseFuture.get();
assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingAndReleaseCall);
break;
}
}
use of org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway in project flink by apache.
the class ExecutionPartitionLifecycleTest method testPartitionReleaseOnStateTransitionsAfterRunning.
private void testPartitionReleaseOnStateTransitionsAfterRunning(Consumer<Execution> stateTransition1, Consumer<Execution> stateTransition2) throws Exception {
final SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
final CompletableFuture<Tuple2<JobID, Collection<ResultPartitionID>>> releasePartitionsCallFuture = new CompletableFuture<>();
taskManagerGateway.setReleasePartitionsConsumer(((jobID, partitionIds) -> releasePartitionsCallFuture.complete(Tuple2.of(jobID, partitionIds))));
final TestingShuffleMaster testingShuffleMaster = new TestingShuffleMaster();
setupExecutionGraphAndStartRunningJob(ResultPartitionType.PIPELINED, NoOpJobMasterPartitionTracker.INSTANCE, taskManagerGateway, testingShuffleMaster);
stateTransition1.accept(execution);
assertFalse(releasePartitionsCallFuture.isDone());
stateTransition2.accept(execution);
assertTrue(releasePartitionsCallFuture.isDone());
final Tuple2<JobID, Collection<ResultPartitionID>> releasePartitionsCall = releasePartitionsCallFuture.get();
assertEquals(jobId, releasePartitionsCall.f0);
assertThat(releasePartitionsCall.f1, contains(descriptor.getShuffleDescriptor().getResultPartitionID()));
assertEquals(1, testingShuffleMaster.externallyReleasedPartitions.size());
assertEquals(descriptor.getShuffleDescriptor(), testingShuffleMaster.externallyReleasedPartitions.poll());
}
use of org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway in project flink by apache.
the class DeclarativeSlotPoolBridgeTest method testIfJobIsRestartingAllOfferedSlotsWillBeRegistered.
@Test
public void testIfJobIsRestartingAllOfferedSlotsWillBeRegistered() throws Exception {
final CompletableFuture<Void> registerSlotsCalledFuture = new CompletableFuture<>();
final TestingDeclarativeSlotPoolFactory declarativeSlotPoolFactory = new TestingDeclarativeSlotPoolFactory(TestingDeclarativeSlotPool.builder().setRegisterSlotsFunction((slotOffers, taskManagerLocation, taskManagerGateway, aLong) -> registerSlotsCalledFuture.complete(null)));
try (DeclarativeSlotPoolBridge declarativeSlotPoolBridge = createDeclarativeSlotPoolBridge(declarativeSlotPoolFactory, requestSlotMatchingStrategy)) {
declarativeSlotPoolBridge.start(jobMasterId, "localhost", mainThreadExecutor);
declarativeSlotPoolBridge.setIsJobRestarting(true);
final LocalTaskManagerLocation localTaskManagerLocation = new LocalTaskManagerLocation();
declarativeSlotPoolBridge.registerTaskManager(localTaskManagerLocation.getResourceID());
declarativeSlotPoolBridge.offerSlots(localTaskManagerLocation, new SimpleAckingTaskManagerGateway(), Collections.singleton(new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY)));
// make sure that the register slots method is called
registerSlotsCalledFuture.join();
}
}
Aggregations