use of org.apache.flink.runtime.metrics.groups.TaskMetricGroup in project flink by apache.
the class AsyncWaitOperatorTest method testAsyncTimeout.
@Test
public void testAsyncTimeout() throws Exception {
final long timeout = 10L;
final AsyncWaitOperator<Integer, Integer> operator = new AsyncWaitOperator<>(new LazyAsyncFunction(), timeout, 2, AsyncDataStream.OutputMode.ORDERED);
final Environment mockEnvironment = mock(Environment.class);
final Configuration taskConfiguration = new Configuration();
final ExecutionConfig executionConfig = new ExecutionConfig();
final TaskMetricGroup metricGroup = new UnregisteredTaskMetricsGroup();
final TaskManagerRuntimeInfo taskManagerRuntimeInfo = new TestingTaskManagerRuntimeInfo();
final TaskInfo taskInfo = new TaskInfo("foobarTask", 1, 0, 1, 1);
when(mockEnvironment.getTaskConfiguration()).thenReturn(taskConfiguration);
when(mockEnvironment.getExecutionConfig()).thenReturn(executionConfig);
when(mockEnvironment.getMetricGroup()).thenReturn(metricGroup);
when(mockEnvironment.getTaskManagerInfo()).thenReturn(taskManagerRuntimeInfo);
when(mockEnvironment.getTaskInfo()).thenReturn(taskInfo);
when(mockEnvironment.getUserClassLoader()).thenReturn(AsyncWaitOperatorTest.class.getClassLoader());
final OneInputStreamOperatorTestHarness<Integer, Integer> testHarness = new OneInputStreamOperatorTestHarness<>(operator, IntSerializer.INSTANCE, mockEnvironment);
final long initialTime = 0L;
final ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
testHarness.setProcessingTime(initialTime);
synchronized (testHarness.getCheckpointLock()) {
testHarness.processElement(new StreamRecord<>(1, initialTime));
testHarness.setProcessingTime(initialTime + 5L);
testHarness.processElement(new StreamRecord<>(2, initialTime + 5L));
}
// trigger the timeout of the first stream record
testHarness.setProcessingTime(initialTime + timeout + 1L);
// allow the second async stream record to be processed
LazyAsyncFunction.countDown();
// wait until all async collectors in the buffer have been emitted out.
synchronized (testHarness.getCheckpointLock()) {
testHarness.close();
}
expectedOutput.add(new StreamRecord<>(2, initialTime + 5L));
TestHarnessUtil.assertOutputEquals("Output with watermark was not correct.", expectedOutput, testHarness.getOutput());
ArgumentCaptor<Throwable> argumentCaptor = ArgumentCaptor.forClass(Throwable.class);
verify(mockEnvironment).failExternally(argumentCaptor.capture());
Throwable failureCause = argumentCaptor.getValue();
Assert.assertNotNull(failureCause.getCause());
Assert.assertTrue(failureCause.getCause() instanceof ExecutionException);
Assert.assertNotNull(failureCause.getCause().getCause());
Assert.assertTrue(failureCause.getCause().getCause() instanceof TimeoutException);
}
use of org.apache.flink.runtime.metrics.groups.TaskMetricGroup in project flink by apache.
the class TaskExecutor method submitTask.
// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@Override
public CompletableFuture<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, JobMasterId jobMasterId, Time timeout) {
try {
final JobID jobId = tdd.getJobId();
final ExecutionAttemptID executionAttemptID = tdd.getExecutionAttemptId();
final JobTable.Connection jobManagerConnection = jobTable.getConnection(jobId).orElseThrow(() -> {
final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
log.debug(message);
return new TaskSubmissionException(message);
});
if (!Objects.equals(jobManagerConnection.getJobMasterId(), jobMasterId)) {
final String message = "Rejecting the task submission because the job manager leader id " + jobMasterId + " does not match the expected job manager leader id " + jobManagerConnection.getJobMasterId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
if (!taskSlotTable.tryMarkSlotActive(jobId, tdd.getAllocationId())) {
final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
// re-integrate offloaded data:
try {
tdd.loadBigData(taskExecutorBlobService.getPermanentBlobService());
} catch (IOException | ClassNotFoundException e) {
throw new TaskSubmissionException("Could not re-integrate offloaded TaskDeploymentDescriptor data.", e);
}
// deserialize the pre-serialized information
final JobInformation jobInformation;
final TaskInformation taskInformation;
try {
jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
} catch (IOException | ClassNotFoundException e) {
throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
}
if (!jobId.equals(jobInformation.getJobId())) {
throw new TaskSubmissionException("Inconsistent job ID information inside TaskDeploymentDescriptor (" + tdd.getJobId() + " vs. " + jobInformation.getJobId() + ")");
}
TaskManagerJobMetricGroup jobGroup = taskManagerMetricGroup.addJob(jobInformation.getJobId(), jobInformation.getJobName());
// note that a pre-existing job group can NOT be closed concurrently - this is done by
// the same TM thread in removeJobMetricsGroup
TaskMetricGroup taskMetricGroup = jobGroup.addTask(taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getJobManagerGateway(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getRpcTimeout());
final TaskOperatorEventGateway taskOperatorEventGateway = new RpcTaskOperatorEventGateway(jobManagerConnection.getJobManagerGateway(), executionAttemptID, (t) -> runAsync(() -> failTask(executionAttemptID, t)));
TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
GlobalAggregateManager aggregateManager = jobManagerConnection.getGlobalAggregateManager();
LibraryCacheManager.ClassLoaderHandle classLoaderHandle = jobManagerConnection.getClassLoaderHandle();
ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
final TaskLocalStateStore localStateStore = localStateStoresManager.localStateStoreForSubtask(jobId, tdd.getAllocationId(), taskInformation.getJobVertexId(), tdd.getSubtaskIndex());
// TODO: Pass config value from user program and do overriding here.
final StateChangelogStorage<?> changelogStorage;
try {
changelogStorage = changelogStoragesManager.stateChangelogStorageForJob(jobId, taskManagerConfiguration.getConfiguration(), jobGroup);
} catch (IOException e) {
throw new TaskSubmissionException(e);
}
final JobManagerTaskRestore taskRestore = tdd.getTaskRestore();
final TaskStateManager taskStateManager = new TaskStateManagerImpl(jobId, tdd.getExecutionAttemptId(), localStateStore, changelogStorage, taskRestore, checkpointResponder);
MemoryManager memoryManager;
try {
memoryManager = taskSlotTable.getTaskMemoryManager(tdd.getAllocationId());
} catch (SlotNotFoundException e) {
throw new TaskSubmissionException("Could not submit task.", e);
}
Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), memoryManager, taskExecutorServices.getIOManager(), taskExecutorServices.getShuffleEnvironment(), taskExecutorServices.getKvStateService(), taskExecutorServices.getBroadcastVariableManager(), taskExecutorServices.getTaskEventDispatcher(), externalResourceInfoProvider, taskStateManager, taskManagerActions, inputSplitProvider, checkpointResponder, taskOperatorEventGateway, aggregateManager, classLoaderHandle, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getScheduledExecutor());
taskMetricGroup.gauge(MetricNames.IS_BACK_PRESSURED, task::isBackPressured);
log.info("Received task {} ({}), deploy into slot with allocation id {}.", task.getTaskInfo().getTaskNameWithSubtasks(), tdd.getExecutionAttemptId(), tdd.getAllocationId());
boolean taskAdded;
try {
taskAdded = taskSlotTable.addTask(task);
} catch (SlotNotFoundException | SlotNotActiveException e) {
throw new TaskSubmissionException("Could not submit task.", e);
}
if (taskAdded) {
task.startTaskThread();
setupResultPartitionBookkeeping(tdd.getJobId(), tdd.getProducedPartitions(), task.getTerminationFuture());
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
} catch (TaskSubmissionException e) {
return FutureUtils.completedExceptionally(e);
}
}
use of org.apache.flink.runtime.metrics.groups.TaskMetricGroup in project flink by apache.
the class NettyShuffleEnvironmentTest method testRegisteringDebloatingMetrics.
@Test
@SuppressWarnings("unchecked")
public void testRegisteringDebloatingMetrics() throws IOException {
Map<String, Metric> metrics = new ConcurrentHashMap<>();
final TaskMetricGroup taskMetricGroup = createTaskMetricGroup(metrics);
final Configuration config = new Configuration();
config.set(TaskManagerOptions.BUFFER_DEBLOAT_ENABLED, true);
final NettyShuffleEnvironment shuffleEnvironment = new NettyShuffleEnvironmentBuilder().setDebloatConfig(BufferDebloatConfiguration.fromConfiguration(config)).build();
shuffleEnvironment.createInputGates(shuffleEnvironment.createShuffleIOOwnerContext("test", new ExecutionAttemptID(), taskMetricGroup), (dsid, id, consumer) -> {
}, Arrays.asList(new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, new ShuffleDescriptor[] { new NettyShuffleDescriptorBuilder().buildRemote() }), new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 1, new ShuffleDescriptor[] { new NettyShuffleDescriptorBuilder().buildRemote() })));
for (int i = 0; i < 2; i++) {
assertEquals(TaskManagerOptions.MEMORY_SEGMENT_SIZE.defaultValue().getBytes(), (long) ((Gauge<Integer>) getDebloatingMetric(metrics, i, MetricNames.DEBLOATED_BUFFER_SIZE)).getValue());
assertEquals(0L, (long) ((Gauge<Long>) getDebloatingMetric(metrics, i, MetricNames.ESTIMATED_TIME_TO_CONSUME_BUFFERS)).getValue());
}
}
use of org.apache.flink.runtime.metrics.groups.TaskMetricGroup in project flink by apache.
the class MultipleInputStreamTaskTest method testCheckpointBarrierMetrics.
/**
* Tests the checkpoint related metrics are registered into {@link TaskIOMetricGroup} correctly
* while generating the {@link TwoInputStreamTask}.
*/
@Test
public void testCheckpointBarrierMetrics() throws Exception {
final Map<String, Metric> metrics = new ConcurrentHashMap<>();
final TaskMetricGroup taskMetricGroup = StreamTaskTestHarness.createTaskMetricGroup(metrics);
try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(MultipleInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).addInput(BasicTypeInfo.STRING_TYPE_INFO, 2).addInput(BasicTypeInfo.INT_TYPE_INFO, 2).addInput(BasicTypeInfo.DOUBLE_TYPE_INFO, 2).setupOutputForSingletonOperatorChain(new MapToStringMultipleInputOperatorFactory(3)).setTaskMetricGroup(taskMetricGroup).build()) {
assertThat(metrics, IsMapContaining.hasKey(MetricNames.CHECKPOINT_ALIGNMENT_TIME));
assertThat(metrics, IsMapContaining.hasKey(MetricNames.CHECKPOINT_START_DELAY_TIME));
testHarness.endInput();
testHarness.waitForTaskCompletion();
}
}
use of org.apache.flink.runtime.metrics.groups.TaskMetricGroup in project flink by apache.
the class MultipleInputStreamTaskTest method testMetrics.
/**
* With chained sources, task's and main operator's number of input records are two different
* things. The first one should take into account only records comming in from the network,
* ignoring records produced inside the task itself (like via a chained source). Main operator
* should on the other hand report all records from all of the inputs (regardless if it's a
* network or chained input).
*/
@Test
public void testMetrics() throws Exception {
HashMap<String, OperatorMetricGroup> operatorMetrics = new HashMap<>();
TaskMetricGroup taskMetricGroup = new UnregisteredMetricGroups.UnregisteredTaskMetricGroup() {
@Override
public InternalOperatorMetricGroup getOrAddOperator(OperatorID operatorID, String name) {
InternalOperatorMetricGroup operatorMetricGroup = super.getOrAddOperator(operatorID, name);
operatorMetrics.put(name, operatorMetricGroup);
return operatorMetricGroup;
}
};
String mainOperatorName = "MainOperator";
try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(MultipleInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).modifyExecutionConfig(applyObjectReuse(objectReuse)).addInput(BasicTypeInfo.STRING_TYPE_INFO).addSourceInput(new SourceOperatorFactory<>(new LifeCycleTrackingMockSource(Boundedness.BOUNDED, 1), WatermarkStrategy.noWatermarks()), BasicTypeInfo.INT_TYPE_INFO).addInput(BasicTypeInfo.STRING_TYPE_INFO).setupOperatorChain(new MapToStringMultipleInputOperatorFactory(3)).name(mainOperatorName).chain(new OneInputStreamTaskTest.DuplicatingOperator(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig())).chain(new OneInputStreamTaskTest.DuplicatingOperator(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig())).chain(new OneInputStreamTaskTest.DuplicatingOperator(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig())).finish().setTaskMetricGroup(taskMetricGroup).build()) {
assertTrue(operatorMetrics.containsKey(mainOperatorName));
OperatorMetricGroup mainOperatorMetrics = operatorMetrics.get(mainOperatorName);
Counter numRecordsInCounter = taskMetricGroup.getIOMetricGroup().getNumRecordsInCounter();
Counter numRecordsOutCounter = taskMetricGroup.getIOMetricGroup().getNumRecordsOutCounter();
int numRecords1 = 5;
int numRecords2 = 3;
int numRecords3 = 2;
// end prematurely
for (int x = 0; x < numRecords2; x++) {
addSourceRecords(testHarness, 1, 42);
}
for (int x = 0; x < numRecords1; x++) {
testHarness.processElement(new StreamRecord<>("hello"), 0, 0);
}
for (int x = 0; x < numRecords3; x++) {
testHarness.processElement(new StreamRecord<>("hello"), 1, 0);
}
int networkRecordsIn = numRecords1 + numRecords3;
int mainOperatorRecordsIn = networkRecordsIn + numRecords2;
int totalRecordsOut = mainOperatorRecordsIn * 2 * 2 * // there are three operators duplicating the records
2;
assertEquals(mainOperatorRecordsIn, mainOperatorMetrics.getIOMetricGroup().getNumRecordsInCounter().getCount());
assertEquals(networkRecordsIn, numRecordsInCounter.getCount());
assertEquals(totalRecordsOut, numRecordsOutCounter.getCount());
testHarness.waitForTaskCompletion();
}
}
Aggregations