use of org.apache.flink.runtime.executiongraph.metrics.DownTimeGauge in project flink by apache.
the class SchedulerBase method registerJobMetrics.
public static void registerJobMetrics(MetricGroup metrics, JobStatusProvider jobStatusProvider, Gauge<Long> numberOfRestarts, DeploymentStateTimeMetrics deploymentTimeMetrics, Consumer<JobStatusListener> jobStatusListenerRegistrar, long initializationTimestamp, MetricOptions.JobStatusMetricsSettings jobStatusMetricsSettings) {
metrics.gauge(DownTimeGauge.METRIC_NAME, new DownTimeGauge(jobStatusProvider));
metrics.gauge(UpTimeGauge.METRIC_NAME, new UpTimeGauge(jobStatusProvider));
metrics.gauge(MetricNames.NUM_RESTARTS, numberOfRestarts);
metrics.gauge(MetricNames.FULL_RESTARTS, numberOfRestarts);
final JobStatusMetrics jobStatusMetrics = new JobStatusMetrics(initializationTimestamp, jobStatusMetricsSettings);
jobStatusMetrics.registerMetrics(metrics);
jobStatusListenerRegistrar.accept(jobStatusMetrics);
deploymentTimeMetrics.registerMetrics(metrics);
}
use of org.apache.flink.runtime.executiongraph.metrics.DownTimeGauge in project flink by apache.
the class AdaptiveSchedulerTest method testStatusMetrics.
@Test
public void testStatusMetrics() throws Exception {
final CompletableFuture<UpTimeGauge> upTimeMetricFuture = new CompletableFuture<>();
final CompletableFuture<DownTimeGauge> downTimeMetricFuture = new CompletableFuture<>();
// restartingTime acts as a stand-in for generic status time metrics
final CompletableFuture<Gauge<Long>> restartTimeMetricFuture = new CompletableFuture<>();
final MetricRegistry metricRegistry = TestingMetricRegistry.builder().setRegisterConsumer((metric, name, group) -> {
switch(name) {
case UpTimeGauge.METRIC_NAME:
upTimeMetricFuture.complete((UpTimeGauge) metric);
break;
case DownTimeGauge.METRIC_NAME:
downTimeMetricFuture.complete((DownTimeGauge) metric);
break;
case "restartingTimeTotal":
restartTimeMetricFuture.complete((Gauge<Long>) metric);
break;
}
}).build();
final JobGraph jobGraph = createJobGraph();
final DefaultDeclarativeSlotPool declarativeSlotPool = createDeclarativeSlotPool(jobGraph.getJobID());
final Configuration configuration = new Configuration();
configuration.set(JobManagerOptions.MIN_PARALLELISM_INCREASE, 1);
configuration.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofMillis(10L));
configuration.set(MetricOptions.JOB_STATUS_METRICS, Arrays.asList(MetricOptions.JobStatusMetrics.TOTAL_TIME));
final AdaptiveScheduler scheduler = new AdaptiveSchedulerBuilder(jobGraph, singleThreadMainThreadExecutor).setJobMasterConfiguration(configuration).setJobManagerJobMetricGroup(JobManagerMetricGroup.createJobManagerMetricGroup(metricRegistry, "localhost").addJob(new JobID(), "jobName")).setDeclarativeSlotPool(declarativeSlotPool).build();
final UpTimeGauge upTimeGauge = upTimeMetricFuture.get();
final DownTimeGauge downTimeGauge = downTimeMetricFuture.get();
final Gauge<Long> restartTimeGauge = restartTimeMetricFuture.get();
final SubmissionBufferingTaskManagerGateway taskManagerGateway = new SubmissionBufferingTaskManagerGateway(1 + PARALLELISM);
taskManagerGateway.setCancelConsumer(createCancelConsumer(scheduler));
singleThreadMainThreadExecutor.execute(() -> {
scheduler.startScheduling();
offerSlots(declarativeSlotPool, createSlotOffersForResourceRequirements(ResourceCounter.withResource(ResourceProfile.UNKNOWN, 1)), taskManagerGateway);
});
// wait for the first task submission
taskManagerGateway.waitForSubmissions(1, Duration.ofSeconds(5));
// sleep a bit to ensure uptime is > 0
Thread.sleep(10L);
assertThat(upTimeGauge.getValue()).isGreaterThan(0L);
assertThat(downTimeGauge.getValue()).isEqualTo(0L);
assertThat(restartTimeGauge.getValue()).isEqualTo(0L);
singleThreadMainThreadExecutor.execute(() -> {
// offer more slots, which will cause a restart in order to scale up
offerSlots(declarativeSlotPool, createSlotOffersForResourceRequirements(ResourceCounter.withResource(ResourceProfile.UNKNOWN, 1)), taskManagerGateway);
});
// wait for the second task submissions
taskManagerGateway.waitForSubmissions(2, Duration.ofSeconds(5));
// sleep a bit to ensure uptime is > 0
Thread.sleep(10L);
assertThat(upTimeGauge.getValue()).isGreaterThan(0L);
assertThat(downTimeGauge.getValue()).isEqualTo(0L);
// can be zero if the restart is very quick
assertThat(restartTimeGauge.getValue()).isGreaterThanOrEqualTo(0L);
}
Aggregations