use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.
the class ExecutionGraphMetricsTest method testExecutionGraphRestartTimeMetric.
/**
* This test tests that the restarting time metric correctly displays restarting times.
*/
@Test
public void testExecutionGraphRestartTimeMetric() throws JobException, IOException, InterruptedException {
final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
try {
// setup execution graph with mocked scheduling logic
int parallelism = 1;
JobVertex jobVertex = new JobVertex("TestVertex");
jobVertex.setParallelism(parallelism);
jobVertex.setInvokableClass(NoOpInvokable.class);
JobGraph jobGraph = new JobGraph("Test Job", jobVertex);
Configuration config = new Configuration();
config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestingReporter.class.getName());
Configuration jobConfig = new Configuration();
Time timeout = Time.seconds(10L);
MetricRegistry metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
assertTrue(metricRegistry.getReporters().size() == 1);
MetricReporter reporter = metricRegistry.getReporters().get(0);
assertTrue(reporter instanceof TestingReporter);
TestingReporter testingReporter = (TestingReporter) reporter;
MetricGroup metricGroup = new JobManagerMetricGroup(metricRegistry, "localhost");
Scheduler scheduler = mock(Scheduler.class);
ResourceID taskManagerId = ResourceID.generate();
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(taskManagerId);
when(taskManagerLocation.getHostname()).thenReturn("localhost");
TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
Instance instance = mock(Instance.class);
when(instance.getTaskManagerLocation()).thenReturn(taskManagerLocation);
when(instance.getTaskManagerID()).thenReturn(taskManagerId);
when(instance.getTaskManagerGateway()).thenReturn(taskManagerGateway);
Slot rootSlot = mock(Slot.class);
AllocatedSlot mockAllocatedSlot = mock(AllocatedSlot.class);
when(mockAllocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
SimpleSlot simpleSlot = mock(SimpleSlot.class);
when(simpleSlot.isAlive()).thenReturn(true);
when(simpleSlot.getTaskManagerLocation()).thenReturn(taskManagerLocation);
when(simpleSlot.getTaskManagerID()).thenReturn(taskManagerId);
when(simpleSlot.getTaskManagerGateway()).thenReturn(taskManagerGateway);
when(simpleSlot.setExecutedVertex(Matchers.any(Execution.class))).thenReturn(true);
when(simpleSlot.getRoot()).thenReturn(rootSlot);
when(simpleSlot.getAllocatedSlot()).thenReturn(mockAllocatedSlot);
FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
future.complete(simpleSlot);
when(scheduler.allocateSlot(any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
when(rootSlot.getSlotNumber()).thenReturn(0);
when(taskManagerGateway.submitTask(any(TaskDeploymentDescriptor.class), any(Time.class))).thenReturn(FlinkCompletableFuture.completed(Acknowledge.get()));
TestingRestartStrategy testingRestartStrategy = new TestingRestartStrategy();
ExecutionGraph executionGraph = new ExecutionGraph(executor, executor, jobGraph.getJobID(), jobGraph.getName(), jobConfig, new SerializedValue<ExecutionConfig>(null), timeout, testingRestartStrategy, Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), scheduler, getClass().getClassLoader(), metricGroup);
// get restarting time metric
Metric metric = testingReporter.getMetric(ExecutionGraph.RESTARTING_TIME_METRIC_NAME);
assertNotNull(metric);
assertTrue(metric instanceof Gauge);
@SuppressWarnings("unchecked") Gauge<Long> restartingTime = (Gauge<Long>) metric;
// check that the restarting time is 0 since it's the initial start
assertTrue(0L == restartingTime.getValue());
executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
// start execution
executionGraph.scheduleForExecution();
assertTrue(0L == restartingTime.getValue());
List<ExecutionAttemptID> executionIDs = new ArrayList<>();
for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
}
// tell execution graph that the tasks are in state running --> job status switches to state running
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
}
assertEquals(JobStatus.RUNNING, executionGraph.getState());
assertTrue(0L == restartingTime.getValue());
// fail the job so that it goes into state restarting
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
long firstRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
// wait some time so that the restarting time gauge shows a value different from 0
Thread.sleep(50);
long previousRestartingTime = restartingTime.getValue();
// check that the restarting time is monotonically increasing
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime >= previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
// check that we have measured some restarting time
assertTrue(previousRestartingTime > 0);
// restart job
testingRestartStrategy.restartExecutionGraph();
executionIDs.clear();
for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
}
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
}
assertEquals(JobStatus.RUNNING, executionGraph.getState());
assertTrue(firstRestartingTimestamp != 0);
previousRestartingTime = restartingTime.getValue();
// check that the restarting time does not increase after we've reached the running state
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime == previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
// fail job again
for (ExecutionAttemptID executionID : executionIDs) {
executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
long secondRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
assertTrue(firstRestartingTimestamp != secondRestartingTimestamp);
Thread.sleep(50);
previousRestartingTime = restartingTime.getValue();
// check that the restarting time is increasing again
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime >= previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
assertTrue(previousRestartingTime > 0);
// now lets fail the job while it is in restarting and see whether the restarting time then stops to increase
// for this to work, we have to use a SuppressRestartException
executionGraph.fail(new SuppressRestartsException(new Exception()));
assertEquals(JobStatus.FAILED, executionGraph.getState());
previousRestartingTime = restartingTime.getValue();
for (int i = 0; i < 10; i++) {
long currentRestartingTime = restartingTime.getValue();
assertTrue(currentRestartingTime == previousRestartingTime);
previousRestartingTime = currentRestartingTime;
}
} finally {
executor.shutdownNow();
}
}
use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.
the class ReporterSetup method setupReporters.
private static List<ReporterSetup> setupReporters(Map<String, MetricReporterFactory> reporterFactories, List<Tuple2<String, Configuration>> reporterConfigurations) {
List<ReporterSetup> reporterSetups = new ArrayList<>(reporterConfigurations.size());
for (Tuple2<String, Configuration> reporterConfiguration : reporterConfigurations) {
String reporterName = reporterConfiguration.f0;
Configuration reporterConfig = reporterConfiguration.f1;
try {
Optional<MetricReporter> metricReporterOptional = loadReporter(reporterName, reporterConfig, reporterFactories);
// massage user variables keys into scope format for parity to variable exclusion
Map<String, String> additionalVariables = reporterConfig.get(ADDITIONAL_VARIABLES).entrySet().stream().collect(Collectors.toMap(e -> ScopeFormat.asVariable(e.getKey()), Entry::getValue));
metricReporterOptional.ifPresent(reporter -> {
MetricConfig metricConfig = new MetricConfig();
reporterConfig.addAllToProperties(metricConfig);
reporterSetups.add(createReporterSetup(reporterName, metricConfig, reporter, additionalVariables));
});
} catch (Throwable t) {
LOG.error("Could not instantiate metrics reporter {}. Metrics might not be exposed/reported.", reporterName, t);
}
}
return reporterSetups;
}
use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.
the class JMXReporterTest method testPortConflictHandling.
/**
* Verifies that multiple JMXReporters can be started on the same machine and register metrics
* at the MBeanServer.
*
* @throws Exception if the attribute/mbean could not be found or the test is broken
*/
@Test
void testPortConflictHandling() throws Exception {
final MetricReporter rep1 = new JMXReporter("9020-9035");
final MetricReporter rep2 = new JMXReporter("9020-9035");
Gauge<Integer> g1 = () -> 1;
Gauge<Integer> g2 = () -> 2;
rep1.notifyOfAddedMetric(g1, "rep1", metricGroup);
rep2.notifyOfAddedMetric(g2, "rep2", metricGroup);
MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
ObjectName objectName1 = new ObjectName(JMX_DOMAIN_PREFIX + "taskmanager.rep1", JMXReporter.generateJmxTable(metricGroup.getAllVariables()));
ObjectName objectName2 = new ObjectName(JMX_DOMAIN_PREFIX + "taskmanager.rep2", JMXReporter.generateJmxTable(metricGroup.getAllVariables()));
assertThat(mBeanServer.getAttribute(objectName1, "Value")).isEqualTo(1);
assertThat(mBeanServer.getAttribute(objectName2, "Value")).isEqualTo(2);
rep1.notifyOfRemovedMetric(g1, "rep1", null);
rep1.notifyOfRemovedMetric(g2, "rep2", null);
}
use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.
the class ReporterSetupTest method testReporterSetupSupplier.
@Test
public void testReporterSetupSupplier() throws Exception {
final Configuration config = new Configuration();
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "reporter1." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter1.class.getName());
final List<ReporterSetup> reporterSetups = ReporterSetup.fromConfiguration(config, null);
Assert.assertEquals(1, reporterSetups.size());
final ReporterSetup reporterSetup = reporterSetups.get(0);
final MetricReporter metricReporter = reporterSetup.getReporter();
Assert.assertThat(metricReporter, instanceOf(TestReporter1.class));
}
use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.
the class MetricRegistryTest method testConfigurableDelimiterForReportersInGroup.
@Test
public void testConfigurableDelimiterForReportersInGroup() {
Configuration config = new Configuration();
config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test1,test2,test3,test4");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test1." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "_");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test1." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test2." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "-");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test2." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test3." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "AA");
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test3." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test4." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
config.setString(ConfigConstants.METRICS_SCOPE_NAMING_TM, "A.B");
MetricRegistry registry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
List<MetricReporter> reporters = registry.getReporters();
//test1 reporter
((TestReporter8) reporters.get(0)).expectedDelimiter = '_';
//test2 reporter
((TestReporter8) reporters.get(1)).expectedDelimiter = '-';
//test3 reporter, because 'AA' - not correct delimiter
((TestReporter8) reporters.get(2)).expectedDelimiter = GLOBAL_DEFAULT_DELIMITER;
//for test4 reporter use global delimiter
((TestReporter8) reporters.get(3)).expectedDelimiter = GLOBAL_DEFAULT_DELIMITER;
TaskManagerMetricGroup group = new TaskManagerMetricGroup(registry, "host", "id");
group.counter("C");
group.close();
registry.shutdown();
assertEquals(4, TestReporter8.numCorrectDelimitersForRegister);
assertEquals(4, TestReporter8.numCorrectDelimitersForUnregister);
}
Aggregations