use of org.apache.beam.runners.flink.metrics.FlinkMetricContainer in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
// This will be called after initializeState()
this.doFn = getDoFn();
FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
StepContext stepContext = new FlinkStepContext();
doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (requiresStableInput) {
// put this in front of the root FnRunner before any additional wrappers
doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
}
doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
earlyBindStateIfNeeded();
if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
String checkpointMetricNamespace = options.getReportCheckpointDuration();
if (checkpointMetricNamespace != null) {
MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
}
}
elementCount = 0L;
lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
// Schedule timer to check timeout of finish bundle.
long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
} else {
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
bundleFinalizer = new InMemoryBundleFinalizer();
pendingFinalizations = new LinkedHashMap<>();
}
use of org.apache.beam.runners.flink.metrics.FlinkMetricContainer in project beam by apache.
the class UnboundedSourceWrapper method open.
/**
* Initialize and restore state before starting execution of the source.
*/
@Override
public void open(Configuration parameters) throws Exception {
FileSystems.setDefaultPipelineOptions(serializedOptions.get());
runtimeContext = (StreamingRuntimeContext) getRuntimeContext();
metricContainer = new FlinkMetricContainer(runtimeContext);
// figure out which split sources we're responsible for
int subtaskIndex = runtimeContext.getIndexOfThisSubtask();
int numSubtasks = runtimeContext.getNumberOfParallelSubtasks();
localSplitSources = new ArrayList<>();
localReaders = new ArrayList<>();
pendingCheckpoints = new LinkedHashMap<>();
if (isRestored) {
// restore the splitSources from the checkpoint to ensure consistent ordering
for (KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> restored : stateForCheckpoint.get()) {
localSplitSources.add(restored.getKey());
localReaders.add(restored.getKey().createReader(serializedOptions.get(), restored.getValue()));
}
} else {
// initialize localReaders and localSources from scratch
for (int i = 0; i < splitSources.size(); i++) {
if (i % numSubtasks == subtaskIndex) {
UnboundedSource<OutputT, CheckpointMarkT> source = splitSources.get(i);
UnboundedSource.UnboundedReader<OutputT> reader = source.createReader(serializedOptions.get(), null);
localSplitSources.add(source);
localReaders.add(reader);
}
}
}
LOG.info("Unbounded Flink Source {}/{} is reading from sources: {}", subtaskIndex + 1, numSubtasks, localSplitSources);
}
use of org.apache.beam.runners.flink.metrics.FlinkMetricContainer in project beam by apache.
the class FlinkDoFnFunction method open.
@Override
public void open(Configuration parameters) {
// Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
// deserialization method. However, this is a hack, and we want to properly initialize the
// options where they are needed.
PipelineOptions options = serializedOptions.get();
FileSystems.setDefaultPipelineOptions(options);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
metricContainer = new FlinkMetricContainer(getRuntimeContext());
// setup DoFnRunner
final RuntimeContext runtimeContext = getRuntimeContext();
final DoFnRunners.OutputManager outputManager;
if (outputMap.size() == 1) {
outputManager = new DoFnOutputManager();
} else {
// it has some additional outputs
outputManager = new MultiDoFnOutputManager(outputMap);
}
final List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(options, doFn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext(), inputCoder, outputCoderMap, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (!serializedOptions.get().as(FlinkPipelineOptions.class).getDisableMetrics()) {
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
}
this.collectorAware = (CollectorAware) outputManager;
this.doFnRunner = doFnRunner;
}
use of org.apache.beam.runners.flink.metrics.FlinkMetricContainer in project beam by apache.
the class FlinkExecutableStageFunction method open.
@Override
public void open(Configuration parameters) {
FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
// Register standard file systems.
FileSystems.setDefaultPipelineOptions(options);
executableStage = ExecutableStage.fromPayload(stagePayload);
runtimeContext = getRuntimeContext();
metricContainer = new FlinkMetricContainer(runtimeContext);
// TODO: Wire this into the distributed cache and make it pluggable.
stageContext = contextFactory.get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
// NOTE: It's safe to reuse the state handler between partitions because each partition uses the
// same backing runtime context and broadcast variables. We use checkState below to catch errors
// in backward-incompatible Flink changes.
stateRequestHandler = getStateRequestHandler(executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext);
progressHandler = new BundleProgressHandler() {
@Override
public void onProgress(ProcessBundleProgressResponse progress) {
metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
}
@Override
public void onCompleted(ProcessBundleResponse response) {
metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
}
};
// TODO(BEAM-11021): Support bundle finalization in portable batch.
finalizationHandler = bundleId -> {
throw new UnsupportedOperationException("Portable Flink runner doesn't support bundle finalization in batch mode. For more details, please refer to https://issues.apache.org/jira/browse/BEAM-11021.");
};
bundleCheckpointHandler = getBundleCheckpointHandler(executableStage);
}
use of org.apache.beam.runners.flink.metrics.FlinkMetricContainer in project beam by apache.
the class SourceInputFormat method open.
@Override
public void open(SourceInputSplit<T> sourceInputSplit) throws IOException {
metricContainer = new FlinkMetricContainer(getRuntimeContext());
readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.get(), metricContainer);
reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options);
inputAvailable = readerInvoker.invokeStart(reader);
}
Aggregations