use of org.apache.beam.sdk.testutils.metrics.MetricsReader in project beam by apache.
the class KafkaIOIT method readMetrics.
private Set<NamedTestResult> readMetrics(PipelineResult writeResult, PipelineResult readResult) {
BiFunction<MetricsReader, String, NamedTestResult> supplier = (reader, metricName) -> {
long start = reader.getStartTimeMetric(metricName);
long end = reader.getEndTimeMetric(metricName);
return NamedTestResult.create(TEST_ID, TIMESTAMP, metricName, (end - start) / 1e3);
};
NamedTestResult writeTime = supplier.apply(new MetricsReader(writeResult, NAMESPACE), WRITE_TIME_METRIC_NAME);
NamedTestResult readTime = supplier.apply(new MetricsReader(readResult, NAMESPACE), READ_TIME_METRIC_NAME);
NamedTestResult runTime = NamedTestResult.create(TEST_ID, TIMESTAMP, RUN_TIME_METRIC_NAME, writeTime.getValue() + readTime.getValue());
return ImmutableSet.of(readTime, writeTime, runTime);
}
use of org.apache.beam.sdk.testutils.metrics.MetricsReader in project beam by apache.
the class BigQueryIOIT method extractAndPublishTime.
private void extractAndPublishTime(PipelineResult pipelineResult, String writeTimeMetricName) {
final NamedTestResult metricResult = getMetricSupplier(writeTimeMetricName).apply(new MetricsReader(pipelineResult, NAMESPACE));
final List<NamedTestResult> listResults = Collections.singletonList(metricResult);
IOITMetrics.publishToInflux(TEST_ID, TEST_TIMESTAMP, listResults, settings);
}
use of org.apache.beam.sdk.testutils.metrics.MetricsReader in project beam by apache.
the class LoadTest method readMetrics.
private List<NamedTestResult> readMetrics(Timestamp timestamp, PipelineResult result, String testId) {
MetricsReader reader = new MetricsReader(result, metricsNamespace);
NamedTestResult runtime = NamedTestResult.create(testId, timestamp.toString(), buildMetric("runtime_sec"), (reader.getEndTimeMetric("runtime") - reader.getStartTimeMetric("runtime")) / 1000D);
NamedTestResult totalBytes = NamedTestResult.create(testId, timestamp.toString(), buildMetric("total_bytes_count"), reader.getCounterMetric("totalBytes.count"));
return Arrays.asList(runtime, totalBytes);
}
use of org.apache.beam.sdk.testutils.metrics.MetricsReader in project beam by apache.
the class NexmarkLauncher method monitor.
/**
* Monitor the performance and progress of a running job. Return final performance if it was
* measured.
*/
@Nullable
private NexmarkPerf monitor(NexmarkQuery query) {
if (!options.getMonitorJobs()) {
return null;
}
if (configuration.debug) {
NexmarkUtils.console("Waiting for main pipeline to 'finish'");
} else {
NexmarkUtils.console("--debug=false, so job will not self-cancel");
}
PipelineResult job = mainResult;
PipelineResult publisherJob = publisherResult;
List<NexmarkPerf.ProgressSnapshot> snapshots = new ArrayList<>();
long startMsSinceEpoch = System.currentTimeMillis();
long endMsSinceEpoch = -1;
if (options.getRunningTimeMinutes() != null) {
endMsSinceEpoch = startMsSinceEpoch + Duration.standardMinutes(options.getRunningTimeMinutes()).getMillis() - Duration.standardSeconds(configuration.preloadSeconds).getMillis();
}
long lastActivityMsSinceEpoch = -1;
NexmarkPerf perf = null;
boolean waitingForShutdown = false;
boolean cancelJob = false;
boolean publisherCancelled = false;
List<String> errors = new ArrayList<>();
while (true) {
long now = System.currentTimeMillis();
if (endMsSinceEpoch >= 0 && now > endMsSinceEpoch && !waitingForShutdown) {
NexmarkUtils.console("Reached end of test, cancelling job");
try {
cancelJob = true;
job.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel main job: ", e);
}
if (publisherResult != null) {
try {
publisherJob.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel publisher job: ", e);
}
publisherCancelled = true;
}
waitingForShutdown = true;
}
PipelineResult.State state = job.getState();
NexmarkUtils.console("%s %s%s", state, queryName, waitingForShutdown ? " (waiting for shutdown)" : "");
NexmarkPerf currPerf;
if (configuration.debug) {
currPerf = currentPerf(startMsSinceEpoch, now, job, snapshots, query.eventMonitor, query.resultMonitor);
} else {
currPerf = null;
}
if (perf == null || perf.anyActivity(currPerf)) {
lastActivityMsSinceEpoch = now;
}
if (options.isStreaming() && !waitingForShutdown) {
Duration quietFor = new Duration(lastActivityMsSinceEpoch, now);
long fatalCount = new MetricsReader(job, query.getName()).getCounterMetric("fatal");
if (fatalCount == -1) {
fatalCount = 0;
}
if (fatalCount > 0) {
NexmarkUtils.console("ERROR: job has fatal errors, cancelling.");
errors.add(String.format("Pipeline reported %s fatal errors", fatalCount));
waitingForShutdown = true;
cancelJob = true;
} else if (configuration.debug && configuration.numEvents > 0 && currPerf.numEvents >= configuration.numEvents && currPerf.numResults >= 0 && quietFor.isLongerThan(DONE_DELAY)) {
NexmarkUtils.console("streaming query appears to have finished waiting for completion.");
waitingForShutdown = true;
if (options.getCancelStreamingJobAfterFinish()) {
cancelJob = true;
}
} else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) {
NexmarkUtils.console("ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.", quietFor.getStandardMinutes());
errors.add(String.format("Cancelling streaming job since it appeared stuck for %d min.", quietFor.getStandardMinutes()));
waitingForShutdown = true;
cancelJob = true;
} else if (quietFor.isLongerThan(STUCK_WARNING_DELAY)) {
NexmarkUtils.console("WARNING: streaming query appears to have been stuck for %d min.", quietFor.getStandardMinutes());
}
if (cancelJob) {
try {
job.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel main job: ", e);
}
}
}
perf = currPerf;
boolean running = true;
switch(state) {
case UNKNOWN:
case UNRECOGNIZED:
case STOPPED:
case RUNNING:
// Keep going.
break;
case DONE:
// All done.
running = false;
break;
case CANCELLED:
running = false;
if (!cancelJob) {
errors.add("Job was unexpectedly cancelled");
}
break;
case FAILED:
case UPDATED:
// Abnormal termination.
running = false;
errors.add("Job was unexpectedly updated");
break;
}
if (!running) {
break;
}
if (lastActivityMsSinceEpoch == now) {
NexmarkUtils.console("new perf %s", perf);
} else {
NexmarkUtils.console("no activity");
}
try {
Thread.sleep(PERF_DELAY.getMillis());
} catch (InterruptedException e) {
Thread.interrupted();
NexmarkUtils.console("Interrupted: pipeline is still running");
}
}
perf.errors = errors;
perf.snapshots = snapshots;
if (publisherResult != null) {
NexmarkUtils.console("Shutting down publisher pipeline.");
try {
if (!publisherCancelled) {
publisherJob.cancel();
}
publisherJob.waitUntilFinish(Duration.standardMinutes(5));
} catch (IOException e) {
throw new RuntimeException("Unable to cancel publisher job: ", e);
}
}
return perf;
}
use of org.apache.beam.sdk.testutils.metrics.MetricsReader in project beam by apache.
the class NexmarkLauncher method currentPerf.
/**
* Return the current performance given {@code eventMonitor} and {@code resultMonitor}.
*/
private NexmarkPerf currentPerf(long startMsSinceEpoch, long now, PipelineResult result, List<NexmarkPerf.ProgressSnapshot> snapshots, Monitor<?> eventMonitor, Monitor<?> resultMonitor) {
NexmarkPerf perf = new NexmarkPerf();
MetricsReader eventMetrics = new MetricsReader(result, eventMonitor.name);
long numEvents = eventMetrics.getCounterMetric(eventMonitor.prefix + ".elements");
long numEventBytes = eventMetrics.getCounterMetric(eventMonitor.prefix + ".bytes");
long eventStart = eventMetrics.getStartTimeMetric(eventMonitor.prefix + ".processingTime");
long eventEnd = eventMetrics.getEndTimeMetric(eventMonitor.prefix + ".processingTime");
MetricsReader resultMetrics = new MetricsReader(result, resultMonitor.name);
long numResults = resultMetrics.getCounterMetric(resultMonitor.prefix + ".elements");
long numResultBytes = resultMetrics.getCounterMetric(resultMonitor.prefix + ".bytes");
long resultStart = resultMetrics.getStartTimeMetric(resultMonitor.prefix + ".processingTime");
long resultEnd = resultMetrics.getEndTimeMetric(resultMonitor.prefix + ".processingTime");
long timestampStart = resultMetrics.getStartTimeMetric(resultMonitor.prefix + ".eventTimestamp");
long timestampEnd = resultMetrics.getEndTimeMetric(resultMonitor.prefix + ".eventTimestamp");
long effectiveEnd = -1;
if (eventEnd >= 0 && resultEnd >= 0) {
// It is possible for events to be generated after the last result was emitted.
// (Eg Query 2, which only yields results for a small prefix of the event stream.)
// So use the max of last event and last result times.
effectiveEnd = Math.max(eventEnd, resultEnd);
} else if (resultEnd >= 0) {
effectiveEnd = resultEnd;
} else if (eventEnd >= 0) {
// During startup we may have no result yet, but we would still like to track how
// long the pipeline has been running.
effectiveEnd = eventEnd;
}
if (effectiveEnd >= 0 && eventStart >= 0 && effectiveEnd >= eventStart) {
perf.runtimeSec = (effectiveEnd - eventStart) / 1000.0;
}
if (numEvents >= 0) {
perf.numEvents = numEvents;
}
if (numEvents >= 0 && perf.runtimeSec > 0.0) {
// For streaming we may later replace this with a 'steady-state' value calculated
// from the progress snapshots.
perf.eventsPerSec = numEvents / perf.runtimeSec;
}
if (numEventBytes >= 0 && perf.runtimeSec > 0.0) {
perf.eventBytesPerSec = numEventBytes / perf.runtimeSec;
}
if (numResults >= 0) {
perf.numResults = numResults;
}
if (numResults >= 0 && perf.runtimeSec > 0.0) {
perf.resultsPerSec = numResults / perf.runtimeSec;
}
if (numResultBytes >= 0 && perf.runtimeSec > 0.0) {
perf.resultBytesPerSec = numResultBytes / perf.runtimeSec;
}
if (eventStart >= 0) {
perf.startupDelaySec = (eventStart - startMsSinceEpoch) / 1000.0;
}
if (resultStart >= 0 && eventStart >= 0 && resultStart >= eventStart) {
perf.processingDelaySec = (resultStart - eventStart) / 1000.0;
}
if (timestampStart >= 0 && timestampEnd >= 0 && perf.runtimeSec > 0.0) {
double eventRuntimeSec = (timestampEnd - timestampStart) / 1000.0;
perf.timeDilation = eventRuntimeSec / perf.runtimeSec;
}
if (resultEnd >= 0) {
// Fill in the shutdown delay assuming the job has now finished.
perf.shutdownDelaySec = (now - resultEnd) / 1000.0;
}
// As soon as available, try to capture cumulative cost at this point too.
NexmarkPerf.ProgressSnapshot snapshot = new NexmarkPerf.ProgressSnapshot();
snapshot.secSinceStart = (now - startMsSinceEpoch) / 1000.0;
snapshot.runtimeSec = perf.runtimeSec;
snapshot.numEvents = numEvents;
snapshot.numResults = numResults;
snapshots.add(snapshot);
captureSteadyState(perf, snapshots);
return perf;
}
Aggregations