Search in sources :

Example 1 with MetricReporter

use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.

the class ExecutionGraphMetricsTest method testExecutionGraphRestartTimeMetric.

/**
	 * This test tests that the restarting time metric correctly displays restarting times.
	 */
@Test
public void testExecutionGraphRestartTimeMetric() throws JobException, IOException, InterruptedException {
    final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    try {
        // setup execution graph with mocked scheduling logic
        int parallelism = 1;
        JobVertex jobVertex = new JobVertex("TestVertex");
        jobVertex.setParallelism(parallelism);
        jobVertex.setInvokableClass(NoOpInvokable.class);
        JobGraph jobGraph = new JobGraph("Test Job", jobVertex);
        Configuration config = new Configuration();
        config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
        config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestingReporter.class.getName());
        Configuration jobConfig = new Configuration();
        Time timeout = Time.seconds(10L);
        MetricRegistry metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
        assertTrue(metricRegistry.getReporters().size() == 1);
        MetricReporter reporter = metricRegistry.getReporters().get(0);
        assertTrue(reporter instanceof TestingReporter);
        TestingReporter testingReporter = (TestingReporter) reporter;
        MetricGroup metricGroup = new JobManagerMetricGroup(metricRegistry, "localhost");
        Scheduler scheduler = mock(Scheduler.class);
        ResourceID taskManagerId = ResourceID.generate();
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(taskManagerId);
        when(taskManagerLocation.getHostname()).thenReturn("localhost");
        TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
        Instance instance = mock(Instance.class);
        when(instance.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(instance.getTaskManagerID()).thenReturn(taskManagerId);
        when(instance.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        Slot rootSlot = mock(Slot.class);
        AllocatedSlot mockAllocatedSlot = mock(AllocatedSlot.class);
        when(mockAllocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
        SimpleSlot simpleSlot = mock(SimpleSlot.class);
        when(simpleSlot.isAlive()).thenReturn(true);
        when(simpleSlot.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(simpleSlot.getTaskManagerID()).thenReturn(taskManagerId);
        when(simpleSlot.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        when(simpleSlot.setExecutedVertex(Matchers.any(Execution.class))).thenReturn(true);
        when(simpleSlot.getRoot()).thenReturn(rootSlot);
        when(simpleSlot.getAllocatedSlot()).thenReturn(mockAllocatedSlot);
        FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
        future.complete(simpleSlot);
        when(scheduler.allocateSlot(any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
        when(rootSlot.getSlotNumber()).thenReturn(0);
        when(taskManagerGateway.submitTask(any(TaskDeploymentDescriptor.class), any(Time.class))).thenReturn(FlinkCompletableFuture.completed(Acknowledge.get()));
        TestingRestartStrategy testingRestartStrategy = new TestingRestartStrategy();
        ExecutionGraph executionGraph = new ExecutionGraph(executor, executor, jobGraph.getJobID(), jobGraph.getName(), jobConfig, new SerializedValue<ExecutionConfig>(null), timeout, testingRestartStrategy, Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), scheduler, getClass().getClassLoader(), metricGroup);
        // get restarting time metric
        Metric metric = testingReporter.getMetric(ExecutionGraph.RESTARTING_TIME_METRIC_NAME);
        assertNotNull(metric);
        assertTrue(metric instanceof Gauge);
        @SuppressWarnings("unchecked") Gauge<Long> restartingTime = (Gauge<Long>) metric;
        // check that the restarting time is 0 since it's the initial start
        assertTrue(0L == restartingTime.getValue());
        executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
        // start execution
        executionGraph.scheduleForExecution();
        assertTrue(0L == restartingTime.getValue());
        List<ExecutionAttemptID> executionIDs = new ArrayList<>();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        // tell execution graph that the tasks are in state running --> job status switches to state running
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(0L == restartingTime.getValue());
        // fail the job so that it goes into state restarting
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long firstRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        // wait some time so that the restarting time gauge shows a value different from 0
        Thread.sleep(50);
        long previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is monotonically increasing
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // check that we have measured some restarting time
        assertTrue(previousRestartingTime > 0);
        // restart job
        testingRestartStrategy.restartExecutionGraph();
        executionIDs.clear();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(firstRestartingTimestamp != 0);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time does not increase after we've reached the running state
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // fail job again
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long secondRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        assertTrue(firstRestartingTimestamp != secondRestartingTimestamp);
        Thread.sleep(50);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is increasing again
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        assertTrue(previousRestartingTime > 0);
        // now lets fail the job while it is in restarting and see whether the restarting time then stops to increase
        // for this to work, we have to use a SuppressRestartException
        executionGraph.fail(new SuppressRestartsException(new Exception()));
        assertEquals(JobStatus.FAILED, executionGraph.getState());
        previousRestartingTime = restartingTime.getValue();
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
    } finally {
        executor.shutdownNow();
    }
}
Also used : JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) MetricGroup(org.apache.flink.metrics.MetricGroup) JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) ArrayList(java.util.ArrayList) Time(org.apache.flink.api.common.time.Time) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) Gauge(org.apache.flink.metrics.Gauge) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) JobException(org.apache.flink.runtime.JobException) IOException(java.io.IOException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) Slot(org.apache.flink.runtime.instance.Slot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) Metric(org.apache.flink.metrics.Metric) Test(org.junit.Test)

Example 2 with MetricReporter

use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.

the class ReporterSetup method setupReporters.

private static List<ReporterSetup> setupReporters(Map<String, MetricReporterFactory> reporterFactories, List<Tuple2<String, Configuration>> reporterConfigurations) {
    List<ReporterSetup> reporterSetups = new ArrayList<>(reporterConfigurations.size());
    for (Tuple2<String, Configuration> reporterConfiguration : reporterConfigurations) {
        String reporterName = reporterConfiguration.f0;
        Configuration reporterConfig = reporterConfiguration.f1;
        try {
            Optional<MetricReporter> metricReporterOptional = loadReporter(reporterName, reporterConfig, reporterFactories);
            // massage user variables keys into scope format for parity to variable exclusion
            Map<String, String> additionalVariables = reporterConfig.get(ADDITIONAL_VARIABLES).entrySet().stream().collect(Collectors.toMap(e -> ScopeFormat.asVariable(e.getKey()), Entry::getValue));
            metricReporterOptional.ifPresent(reporter -> {
                MetricConfig metricConfig = new MetricConfig();
                reporterConfig.addAllToProperties(metricConfig);
                reporterSetups.add(createReporterSetup(reporterName, metricConfig, reporter, additionalVariables));
            });
        } catch (Throwable t) {
            LOG.error("Could not instantiate metrics reporter {}. Metrics might not be exposed/reported.", reporterName, t);
        }
    }
    return reporterSetups;
}
Also used : ConfigOptions(org.apache.flink.configuration.ConfigOptions) MetricConfig(org.apache.flink.metrics.MetricConfig) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DelegatingConfiguration(org.apache.flink.configuration.DelegatingConfiguration) LoggerFactory(org.slf4j.LoggerFactory) InterceptInstantiationViaReflection(org.apache.flink.metrics.reporter.InterceptInstantiationViaReflection) HashMap(java.util.HashMap) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Matcher(java.util.regex.Matcher) InstantiateViaFactory(org.apache.flink.metrics.reporter.InstantiateViaFactory) ScopeFormat(org.apache.flink.runtime.metrics.scope.ScopeFormat) Map(java.util.Map) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ServiceConfigurationError(java.util.ServiceConfigurationError) ConfigOption(org.apache.flink.configuration.ConfigOption) Nullable(javax.annotation.Nullable) MetricReporterFactory(org.apache.flink.metrics.reporter.MetricReporterFactory) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) ServiceLoader(java.util.ServiceLoader) MetricOptions(org.apache.flink.configuration.MetricOptions) Collectors(java.util.stream.Collectors) File(java.io.File) Iterators(org.apache.flink.shaded.guava30.com.google.common.collect.Iterators) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) List(java.util.List) PluginManager(org.apache.flink.core.plugin.PluginManager) Entry(java.util.Map.Entry) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Collections(java.util.Collections) MetricConfig(org.apache.flink.metrics.MetricConfig) DelegatingConfiguration(org.apache.flink.configuration.DelegatingConfiguration) Configuration(org.apache.flink.configuration.Configuration) ArrayList(java.util.ArrayList) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter)

Example 3 with MetricReporter

use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.

the class JMXReporterTest method testPortConflictHandling.

/**
 * Verifies that multiple JMXReporters can be started on the same machine and register metrics
 * at the MBeanServer.
 *
 * @throws Exception if the attribute/mbean could not be found or the test is broken
 */
@Test
void testPortConflictHandling() throws Exception {
    final MetricReporter rep1 = new JMXReporter("9020-9035");
    final MetricReporter rep2 = new JMXReporter("9020-9035");
    Gauge<Integer> g1 = () -> 1;
    Gauge<Integer> g2 = () -> 2;
    rep1.notifyOfAddedMetric(g1, "rep1", metricGroup);
    rep2.notifyOfAddedMetric(g2, "rep2", metricGroup);
    MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
    ObjectName objectName1 = new ObjectName(JMX_DOMAIN_PREFIX + "taskmanager.rep1", JMXReporter.generateJmxTable(metricGroup.getAllVariables()));
    ObjectName objectName2 = new ObjectName(JMX_DOMAIN_PREFIX + "taskmanager.rep2", JMXReporter.generateJmxTable(metricGroup.getAllVariables()));
    assertThat(mBeanServer.getAttribute(objectName1, "Value")).isEqualTo(1);
    assertThat(mBeanServer.getAttribute(objectName2, "Value")).isEqualTo(2);
    rep1.notifyOfRemovedMetric(g1, "rep1", null);
    rep1.notifyOfRemovedMetric(g2, "rep2", null);
}
Also used : MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) MBeanServer(javax.management.MBeanServer) ObjectName(javax.management.ObjectName) Test(org.junit.jupiter.api.Test)

Example 4 with MetricReporter

use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.

the class ReporterSetupTest method testReporterSetupSupplier.

@Test
public void testReporterSetupSupplier() throws Exception {
    final Configuration config = new Configuration();
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "reporter1." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter1.class.getName());
    final List<ReporterSetup> reporterSetups = ReporterSetup.fromConfiguration(config, null);
    Assert.assertEquals(1, reporterSetups.size());
    final ReporterSetup reporterSetup = reporterSetups.get(0);
    final MetricReporter metricReporter = reporterSetup.getReporter();
    Assert.assertThat(metricReporter, instanceOf(TestReporter1.class));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) Test(org.junit.Test)

Example 5 with MetricReporter

use of org.apache.flink.metrics.reporter.MetricReporter in project flink by apache.

the class MetricRegistryTest method testConfigurableDelimiterForReportersInGroup.

@Test
public void testConfigurableDelimiterForReportersInGroup() {
    Configuration config = new Configuration();
    config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test1,test2,test3,test4");
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test1." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "_");
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test1." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test2." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "-");
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test2." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test3." + ConfigConstants.METRICS_REPORTER_SCOPE_DELIMITER, "AA");
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test3." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
    config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test4." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestReporter8.class.getName());
    config.setString(ConfigConstants.METRICS_SCOPE_NAMING_TM, "A.B");
    MetricRegistry registry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
    List<MetricReporter> reporters = registry.getReporters();
    //test1  reporter
    ((TestReporter8) reporters.get(0)).expectedDelimiter = '_';
    //test2 reporter
    ((TestReporter8) reporters.get(1)).expectedDelimiter = '-';
    //test3 reporter, because 'AA' - not correct delimiter
    ((TestReporter8) reporters.get(2)).expectedDelimiter = GLOBAL_DEFAULT_DELIMITER;
    //for test4 reporter use global delimiter
    ((TestReporter8) reporters.get(3)).expectedDelimiter = GLOBAL_DEFAULT_DELIMITER;
    TaskManagerMetricGroup group = new TaskManagerMetricGroup(registry, "host", "id");
    group.counter("C");
    group.close();
    registry.shutdown();
    assertEquals(4, TestReporter8.numCorrectDelimitersForRegister);
    assertEquals(4, TestReporter8.numCorrectDelimitersForUnregister);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) Test(org.junit.Test)

Aggregations

MetricReporter (org.apache.flink.metrics.reporter.MetricReporter)7 Configuration (org.apache.flink.configuration.Configuration)4 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 View (org.apache.flink.metrics.View)2 FrontMetricGroup (org.apache.flink.runtime.metrics.groups.FrontMetricGroup)2 File (java.io.File)1 IOException (java.io.IOException)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Optional (java.util.Optional)1 ServiceConfigurationError (java.util.ServiceConfigurationError)1 ServiceLoader (java.util.ServiceLoader)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1