use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class ExecutionGraphMetricsTest method testExecutionGraphRestartTimeMetric.
/**
* This test tests that the restarting time metric correctly displays restarting times.
*/
@Test
public void testExecutionGraphRestartTimeMetric() throws JobException, IOException, InterruptedException {
final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
try {
// setup execution graph with mocked scheduling logic
int parallelism = 1;
JobVertex jobVertex = new JobVertex("TestVertex");
jobVertex.setParallelism(parallelism);
jobVertex.setInvokableClass(NoOpInvokable.class);
JobGraph jobGraph = new JobGraph("Test Job", jobVertex);
Configuration config = new Configuration();
config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestingReporter.class.getName());
Configuration jobConfig = new Configuration();
Time timeout = Time.seconds(10L);
MetricRegistry metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
assertTrue(metricRegistry.getReporters().size() == 1);
MetricReporter reporter = metricRegistry.getReporters().get(0);
assertTrue(reporter instanceof TestingReporter);
TestingReporter testingReporter = (TestingReporter) reporter;
MetricGroup metricGroup = new JobManagerMetricGroup(metricRegistry, "localhost");
Scheduler scheduler = mock(Scheduler.class);
ResourceID taskManagerId = ResourceID.generate();
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(taskManagerId);
when(taskManagerLocation.getHostname()).thenReturn("localhost");
TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
Instance instance = mock(Instance.class);
when(instance.getTaskManagerLocation()).thenReturn(taskManagerLocation);
when(instance.getTaskManagerID()).thenReturn(taskManagerId);
when(instance.getTaskManagerGateway()).thenReturn(taskManagerGateway);
Slot rootSlot = mock(Slot.class);
AllocatedSlot mockAllocatedSlot = mock(AllocatedSlot.class);
when(mockAllocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
SimpleSlot simpleSlot = mock(SimpleSlot.class);
when(simpleSlot.isAlive()).thenReturn(true);
when(simpleSlot.getTaskManagerLocation()).thenReturn(taskManagerLocation);
when(simpleSlot.getTaskManagerID()).thenReturn(taskManagerId);
when(simpleSlot.getTaskManagerGateway()).thenReturn(taskManagerGateway);
when(simpleSlot.setExecutedVertex(Matchers.any(Execution.class))).thenReturn(true);
when(simpleSlot.getRoot()).thenReturn(rootSlot);
when(simpleSlot.getAllocatedSlot()).thenReturn(mockAllocatedSlot);
FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
future.complete(simpleSlot);
when(scheduler.allocateSlot(any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
when(rootSlot.getSlotNumber()).thenReturn(0);
when(taskManagerGateway.submitTask(any(TaskDeploymentDescriptor.class), any(Time.class))).thenReturn(FlinkCompletableFuture.completed(Acknowledge.get()));
TestingRestartStrategy testingRestartStrategy = new TestingRestartStrategy();
ExecutionGraph executionGraph = new ExecutionGraph(executor, executor, jobGraph.getJobID(), jobGraph.getName(), jobConfig, new SerializedValue<ExecutionConfig>(null), timeout, testingRestartStrategy, Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), scheduler, getClass().getClassLoader(), metricGroup);
// get restarting time metric
Metric metric = testingReporter.getMetric(ExecutionGraph.RESTARTING_TIME_METRIC_NAME);
assertNotNull(metric);
assertTrue(metric instanceof Gauge);
@SuppressWarnings("unchecked") Gauge<Long> restartingTime = (Gauge<Long>) metric;
// check that the restarting time is 0 since it's the initial start
assertTrue(0L == restartingTime.getValue());
executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
// start execution
executionGraph.scheduleForExecution();
assertTrue(0L == restartingTime.getValue());
List<ExecutionAttemptID> executionIDs = new ArrayList<>();
for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
}
// tell execution graph that the tasks are in state running --> job status switches to state running
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
}
assertEquals(JobStatus.RUNNING, executionGraph.getState());
assertTrue(0L == restartingTime.getValue());
// fail the job so that it goes into state restarting
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
long firstRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
// wait some time so that the restarting time gauge shows a value different from 0
Thread.sleep(50);
long previousRestartingTime = restartingTime.getValue();
// check that the restarting time is monotonically increasing
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime >= previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
// check that we have measured some restarting time
assertTrue(previousRestartingTime > 0);
// restart job
testingRestartStrategy.restartExecutionGraph();
executionIDs.clear();
for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
}
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
}
assertEquals(JobStatus.RUNNING, executionGraph.getState());
assertTrue(firstRestartingTimestamp != 0);
previousRestartingTime = restartingTime.getValue();
// check that the restarting time does not increase after we've reached the running state
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime == previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
// fail job again
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
long secondRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
assertTrue(firstRestartingTimestamp != secondRestartingTimestamp);
Thread.sleep(50);
previousRestartingTime = restartingTime.getValue();
// check that the restarting time is increasing again
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime >= previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
assertTrue(previousRestartingTime > 0);
// now lets fail the job while it is in restarting and see whether the restarting time then stops to increase
// for this to work, we have to use a SuppressRestartException
executionGraph.fail(new SuppressRestartsException(new Exception()));
assertEquals(JobStatus.FAILED, executionGraph.getState());
previousRestartingTime = restartingTime.getValue();
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime == previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
} finally {
executor.shutdownNow();
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class AvailableSlotsTest method createAllocatedSlot.
static AllocatedSlot createAllocatedSlot(final ResourceID resourceId) {
TaskManagerLocation mockTaskManagerLocation = mock(TaskManagerLocation.class);
when(mockTaskManagerLocation.getResourceID()).thenReturn(resourceId);
TaskManagerGateway mockTaskManagerGateway = mock(TaskManagerGateway.class);
return new AllocatedSlot(new AllocationID(), new JobID(), mockTaskManagerLocation, 0, DEFAULT_TESTING_PROFILE, mockTaskManagerGateway);
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class ExecutionVertexDeploymentTest method testTddProducedPartitionsLazyScheduling.
/**
* Tests that the lazy scheduling flag is correctly forwarded to the produced partition descriptors.
*/
@Test
public void testTddProducedPartitionsLazyScheduling() throws Exception {
TestingUtils.QueuedActionExecutionContext context = TestingUtils.queuedActionExecutionContext();
ExecutionJobVertex jobVertex = getExecutionVertex(new JobVertexID(), context);
IntermediateResult result = new IntermediateResult(new IntermediateDataSetID(), jobVertex, 1, ResultPartitionType.PIPELINED);
ExecutionVertex vertex = new ExecutionVertex(jobVertex, 0, new IntermediateResult[] { result }, Time.minutes(1));
ExecutionEdge mockEdge = createMockExecutionEdge(1);
result.getPartitions()[0].addConsumerGroup();
result.getPartitions()[0].addConsumer(mockEdge, 0);
AllocatedSlot allocatedSlot = mock(AllocatedSlot.class);
when(allocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
Slot root = mock(Slot.class);
when(root.getSlotNumber()).thenReturn(1);
SimpleSlot slot = mock(SimpleSlot.class);
when(slot.getRoot()).thenReturn(root);
when(slot.getAllocatedSlot()).thenReturn(allocatedSlot);
when(root.getAllocatedSlot()).thenReturn(allocatedSlot);
for (ScheduleMode mode : ScheduleMode.values()) {
vertex.getExecutionGraph().setScheduleMode(mode);
TaskDeploymentDescriptor tdd = vertex.createDeploymentDescriptor(new ExecutionAttemptID(), slot, null, 1);
Collection<ResultPartitionDeploymentDescriptor> producedPartitions = tdd.getProducedPartitions();
assertEquals(1, producedPartitions.size());
ResultPartitionDeploymentDescriptor desc = producedPartitions.iterator().next();
assertEquals(mode.allowLazyDeployment(), desc.sendScheduleOrUpdateConsumersMessage());
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class SlotPoolTest method testOfferSlot.
@Test
public void testOfferSlot() throws Exception {
ResourceID resourceID = new ResourceID("resource");
slotPool.registerTaskManager(resourceID);
Future<SimpleSlot> future = slotPool.allocateSlot(mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null);
assertFalse(future.isDone());
ArgumentCaptor<SlotRequest> slotRequestArgumentCaptor = ArgumentCaptor.forClass(SlotRequest.class);
verify(resourceManagerGateway).requestSlot(any(UUID.class), any(UUID.class), slotRequestArgumentCaptor.capture(), any(Time.class));
final SlotRequest slotRequest = slotRequestArgumentCaptor.getValue();
// slot from unregistered resource
AllocatedSlot invalid = createAllocatedSlot(new ResourceID("unregistered"), slotRequest.getAllocationId(), jobId, DEFAULT_TESTING_PROFILE);
assertFalse(slotPool.offerSlot(invalid));
AllocatedSlot notRequested = createAllocatedSlot(resourceID, new AllocationID(), jobId, DEFAULT_TESTING_PROFILE);
// we'll also accept non requested slots
assertTrue(slotPool.offerSlot(notRequested));
AllocatedSlot allocatedSlot = createAllocatedSlot(resourceID, slotRequest.getAllocationId(), jobId, DEFAULT_TESTING_PROFILE);
// accepted slot
assertTrue(slotPool.offerSlot(allocatedSlot));
SimpleSlot slot = future.get(1, TimeUnit.SECONDS);
assertTrue(future.isDone());
assertTrue(slot.isAlive());
// duplicated offer with using slot
assertTrue(slotPool.offerSlot(allocatedSlot));
assertTrue(future.isDone());
assertTrue(slot.isAlive());
// duplicated offer with free slot
slot.releaseSlot();
assertTrue(slot.isReleased());
assertTrue(slotPool.offerSlot(allocatedSlot));
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class TaskDeploymentDescriptorTest method testSerialization.
@Test
public void testSerialization() {
try {
final JobID jobID = new JobID();
final JobVertexID vertexID = new JobVertexID();
final ExecutionAttemptID execId = new ExecutionAttemptID();
final AllocationID allocationId = new AllocationID();
final String jobName = "job name";
final String taskName = "task name";
final int numberOfKeyGroups = 1;
final int indexInSubtaskGroup = 0;
final int currentNumberOfSubtasks = 1;
final int attemptNumber = 0;
final Configuration jobConfiguration = new Configuration();
final Configuration taskConfiguration = new Configuration();
final Class<? extends AbstractInvokable> invokableClass = BatchTask.class;
final List<ResultPartitionDeploymentDescriptor> producedResults = new ArrayList<ResultPartitionDeploymentDescriptor>(0);
final List<InputGateDeploymentDescriptor> inputGates = new ArrayList<InputGateDeploymentDescriptor>(0);
final List<BlobKey> requiredJars = new ArrayList<BlobKey>(0);
final List<URL> requiredClasspaths = new ArrayList<URL>(0);
final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
final SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(new JobInformation(jobID, jobName, executionConfig, jobConfiguration, requiredJars, requiredClasspaths));
final SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(new TaskInformation(vertexID, taskName, currentNumberOfSubtasks, numberOfKeyGroups, invokableClass.getName(), taskConfiguration));
final int targetSlotNumber = 47;
final TaskStateHandles taskStateHandles = new TaskStateHandles();
final TaskDeploymentDescriptor orig = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, execId, allocationId, indexInSubtaskGroup, attemptNumber, targetSlotNumber, taskStateHandles, producedResults, inputGates);
final TaskDeploymentDescriptor copy = CommonTestUtils.createCopySerializable(orig);
assertFalse(orig.getSerializedJobInformation() == copy.getSerializedJobInformation());
assertFalse(orig.getSerializedTaskInformation() == copy.getSerializedTaskInformation());
assertFalse(orig.getExecutionAttemptId() == copy.getExecutionAttemptId());
assertFalse(orig.getTaskStateHandles() == copy.getTaskStateHandles());
assertFalse(orig.getProducedPartitions() == copy.getProducedPartitions());
assertFalse(orig.getInputGates() == copy.getInputGates());
assertEquals(orig.getSerializedJobInformation(), copy.getSerializedJobInformation());
assertEquals(orig.getSerializedTaskInformation(), copy.getSerializedTaskInformation());
assertEquals(orig.getExecutionAttemptId(), copy.getExecutionAttemptId());
assertEquals(orig.getAllocationId(), copy.getAllocationId());
assertEquals(orig.getSubtaskIndex(), copy.getSubtaskIndex());
assertEquals(orig.getAttemptNumber(), copy.getAttemptNumber());
assertEquals(orig.getTargetSlotNumber(), copy.getTargetSlotNumber());
assertEquals(orig.getTaskStateHandles(), copy.getTaskStateHandles());
assertEquals(orig.getProducedPartitions(), copy.getProducedPartitions());
assertEquals(orig.getInputGates(), copy.getInputGates());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations