use of org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker in project flink by apache.
the class ExecutionGraphPartitionReleaseTest method testStrategyNotifiedOfUnFinishedVertices.
@Test
public void testStrategyNotifiedOfUnFinishedVertices() throws Exception {
// setup a pipeline of 2 failover regions (f1 -> f2), where
// f1 is just a source
// f2 consists of 3 operators (o1,o2,o3), where o1 consumes f1, and o2/o3 consume o1
final JobVertex sourceVertex = ExecutionGraphTestUtils.createNoOpVertex("source", 1);
final JobVertex operator1Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator1", 1);
final JobVertex operator2Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator2", 1);
final JobVertex operator3Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator3", 1);
operator1Vertex.connectNewDataSetAsInput(sourceVertex, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
operator2Vertex.connectNewDataSetAsInput(operator1Vertex, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
operator3Vertex.connectNewDataSetAsInput(operator1Vertex, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
// setup partition tracker to intercept partition release calls
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
final Queue<ResultPartitionID> releasedPartitions = new ArrayDeque<>();
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionIds -> releasedPartitions.add(partitionIds.iterator().next()));
final SchedulerBase scheduler = createScheduler(partitionTracker, sourceVertex, operator1Vertex, operator2Vertex, operator3Vertex);
final ExecutionGraph executionGraph = scheduler.getExecutionGraph();
mainThreadExecutor.execute(() -> {
final Execution sourceExecution = getCurrentExecution(sourceVertex, executionGraph);
// finish the source; this should not result in any release calls since the
// consumer o1 was not finished
scheduler.updateTaskExecutionState(new TaskExecutionState(sourceExecution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator1Execution = getCurrentExecution(operator1Vertex, executionGraph);
// release calls since not all operators of the pipelined region are finished
for (final IntermediateResultPartitionID partitionId : operator1Execution.getVertex().getProducedPartitions().keySet()) {
scheduler.notifyPartitionDataAvailable(new ResultPartitionID(partitionId, operator1Execution.getAttemptId()));
}
scheduler.updateTaskExecutionState(new TaskExecutionState(operator1Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator2Execution = getCurrentExecution(operator2Vertex, executionGraph);
// finish o2; this should not result in any release calls since o3 was not
// finished
scheduler.updateTaskExecutionState(new TaskExecutionState(operator2Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator2Execution = getCurrentExecution(operator2Vertex, executionGraph);
// reset o2
operator2Execution.getVertex().resetForNewExecution();
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator3Execution = getCurrentExecution(operator3Vertex, executionGraph);
// finish o3; this should not result in any release calls since o2 was reset
scheduler.updateTaskExecutionState(new TaskExecutionState(operator3Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
}
use of org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker in project flink by apache.
the class ExecutionGraphResultPartitionAvailabilityCheckerTest method testPartitionAvailabilityCheck.
@Test
public void testPartitionAvailabilityCheck() {
final IntermediateResultPartitionID irp1ID = new IntermediateResultPartitionID();
final IntermediateResultPartitionID irp2ID = new IntermediateResultPartitionID();
final IntermediateResultPartitionID irp3ID = new IntermediateResultPartitionID();
final IntermediateResultPartitionID irp4ID = new IntermediateResultPartitionID();
final Map<IntermediateResultPartitionID, Boolean> expectedAvailability = new HashMap<IntermediateResultPartitionID, Boolean>() {
{
put(irp1ID, true);
put(irp2ID, false);
put(irp3ID, false);
put(irp4ID, true);
}
};
// let the partition tracker respect the expected availability result
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setIsPartitionTrackedFunction(rpID -> expectedAvailability.get(rpID.getPartitionId()));
// the execution attempt ID should make no difference in this case
final Function<IntermediateResultPartitionID, ResultPartitionID> partitionIDMapper = intermediateResultPartitionID -> new ResultPartitionID(intermediateResultPartitionID, new ExecutionAttemptID());
final ResultPartitionAvailabilityChecker resultPartitionAvailabilityChecker = new ExecutionGraphResultPartitionAvailabilityChecker(partitionIDMapper, partitionTracker);
for (IntermediateResultPartitionID irpID : expectedAvailability.keySet()) {
assertEquals(expectedAvailability.get(irpID), resultPartitionAvailabilityChecker.isAvailable(irpID));
}
}
use of org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker in project flink by apache.
the class ExecutionPartitionLifecycleTest method testPartitionTrackingForStateTransition.
private void testPartitionTrackingForStateTransition(final Consumer<Execution> stateTransition, final PartitionReleaseResult partitionReleaseResult) throws Exception {
CompletableFuture<Tuple2<ResourceID, ResultPartitionDeploymentDescriptor>> partitionStartTrackingFuture = new CompletableFuture<>();
CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingFuture = new CompletableFuture<>();
CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingAndReleaseFuture = new CompletableFuture<>();
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setStartTrackingPartitionsConsumer((resourceID, resultPartitionDeploymentDescriptor) -> partitionStartTrackingFuture.complete(Tuple2.of(resourceID, resultPartitionDeploymentDescriptor)));
partitionTracker.setStopTrackingPartitionsConsumer(partitionStopTrackingFuture::complete);
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionStopTrackingAndReleaseFuture::complete);
setupExecutionGraphAndStartRunningJob(ResultPartitionType.BLOCKING, partitionTracker, new SimpleAckingTaskManagerGateway(), ShuffleTestUtils.DEFAULT_SHUFFLE_MASTER);
Tuple2<ResourceID, ResultPartitionDeploymentDescriptor> startTrackingCall = partitionStartTrackingFuture.get();
assertThat(startTrackingCall.f0, equalTo(taskExecutorResourceId));
assertThat(startTrackingCall.f1, equalTo(descriptor));
stateTransition.accept(execution);
switch(partitionReleaseResult) {
case NONE:
assertFalse(partitionStopTrackingFuture.isDone());
assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
break;
case STOP_TRACKING:
assertTrue(partitionStopTrackingFuture.isDone());
assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
final Collection<ResultPartitionID> stopTrackingCall = partitionStopTrackingFuture.get();
assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingCall);
break;
case STOP_TRACKING_AND_RELEASE:
assertFalse(partitionStopTrackingFuture.isDone());
assertTrue(partitionStopTrackingAndReleaseFuture.isDone());
final Collection<ResultPartitionID> stopTrackingAndReleaseCall = partitionStopTrackingAndReleaseFuture.get();
assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingAndReleaseCall);
break;
}
}
use of org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker in project flink by apache.
the class DefaultSchedulerTest method setUp.
@Before
public void setUp() throws Exception {
executor = Executors.newSingleThreadExecutor();
scheduledExecutorService = new DirectScheduledExecutorService();
configuration = new Configuration();
testRestartBackoffTimeStrategy = new TestRestartBackoffTimeStrategy(true, 0);
testExecutionVertexOperations = new TestExecutionVertexOperationsDecorator(new DefaultExecutionVertexOperations());
executionVertexVersioner = new ExecutionVertexVersioner();
executionSlotAllocatorFactory = new TestExecutionSlotAllocatorFactory();
testExecutionSlotAllocator = executionSlotAllocatorFactory.getTestExecutionSlotAllocator();
shuffleMaster = new TestingShuffleMaster();
partitionTracker = new TestingJobMasterPartitionTracker();
timeout = Time.seconds(60);
}
use of org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker in project flink by apache.
the class JobMasterTest method testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated.
@Test
public void testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated() throws Exception {
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean isTrackingPartitions = new AtomicBoolean(true);
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setIsTrackingPartitionsForFunction(ignored -> isTrackingPartitions.get());
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withJobManagerSharedServices(jobManagerSharedServices).withHeartbeatServices(heartbeatServices).withPartitionTrackerFactory(ignored -> partitionTracker).createJobMaster();
final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setFreeSlotFunction((allocationID, throwable) -> {
freedSlotFuture.complete(allocationID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID)).createTestingTaskExecutorGateway();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, jobGraph.getJobID(), testingTaskExecutorGateway, taskManagerUnresolvedLocation);
// check that we accepted the offered slot
assertThat(slotOffers, hasSize(1));
final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();
jobMasterGateway.failSlot(taskManagerUnresolvedLocation.getResourceID(), allocationId, new FlinkException("Fail allocation test exception"));
// we should free the slot, but not disconnect from the TaskExecutor as we still have an
// allocated partition
assertThat(freedSlotFuture.get(), equalTo(allocationId));
// trigger some request to guarantee ensure the slotAllocationFailure processing if
// complete
jobMasterGateway.requestJobStatus(Time.seconds(5)).get();
assertThat(disconnectTaskExecutorFuture.isDone(), is(false));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations