use of org.apache.beam.runners.core.OutputWindowedValue in project beam by apache.
the class BatchGroupAlsoByWindowAndCombineFn method processElement.
@Override
public void processElement(KV<K, Iterable<WindowedValue<InputT>>> element, PipelineOptions options, StepContext stepContext, SideInputReader sideInputReader, OutputWindowedValue<KV<K, OutputT>> output) throws Exception {
final PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> perKeyCombineFnRunner;
if (perKeyCombineFn instanceof CombineFn) {
perKeyCombineFnRunner = new KeyedCombineFnRunner((CombineFn<InputT, AccumT, OutputT>) perKeyCombineFn);
} else {
perKeyCombineFnRunner = new KeyedCombineFnWithContextRunner(options, (CombineFnWithContext<InputT, AccumT, OutputT>) perKeyCombineFn, sideInputReader);
}
final K key = element.getKey();
Iterator<WindowedValue<InputT>> iterator = element.getValue().iterator();
final PriorityQueue<W> liveWindows = new PriorityQueue<>(11, (w1, w2) -> Long.signum(w1.maxTimestamp().getMillis() - w2.maxTimestamp().getMillis()));
final Map<W, AccumT> accumulators = Maps.newHashMap();
final Map<W, Instant> accumulatorOutputTimestamps = Maps.newHashMap();
WindowFn<Object, W>.MergeContext mergeContext = windowingStrategy.getWindowFn().new MergeContext() {
@Override
public Collection<W> windows() {
return liveWindows;
}
@Override
public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
List<AccumT> accumsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
List<Instant> timestampsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
for (W window : toBeMerged) {
accumsToBeMerged.add(accumulators.remove(window));
timestampsToBeMerged.add(accumulatorOutputTimestamps.remove(window));
}
liveWindows.removeAll(toBeMerged);
Instant mergedOutputTimestamp = windowingStrategy.getTimestampCombiner().merge(mergeResult, timestampsToBeMerged);
accumulatorOutputTimestamps.put(mergeResult, mergedOutputTimestamp);
liveWindows.add(mergeResult);
AccumT accum = perKeyCombineFnRunner.mergeAccumulators(accumsToBeMerged, mergeResult);
accumulators.put(mergeResult, accum);
}
};
while (iterator.hasNext()) {
WindowedValue<InputT> e = iterator.next();
@SuppressWarnings("unchecked") Collection<W> windows = (Collection<W>) e.getWindows();
for (W window : windows) {
Instant outputTime = windowingStrategy.getTimestampCombiner().assign(window, e.getTimestamp());
Instant accumulatorOutputTime = accumulatorOutputTimestamps.get(window);
if (accumulatorOutputTime == null) {
accumulatorOutputTimestamps.put(window, outputTime);
} else {
accumulatorOutputTimestamps.put(window, windowingStrategy.getTimestampCombiner().combine(outputTime, accumulatorOutputTime));
}
AccumT accum = accumulators.get(window);
checkState((accumulatorOutputTime == null && accum == null) || (accumulatorOutputTime != null && accum != null), "accumulator and accumulatorOutputTime should both be null or both be non-null");
if (accum == null) {
accum = perKeyCombineFnRunner.createAccumulator(window);
liveWindows.add(window);
}
accum = perKeyCombineFnRunner.addInput(accum, e.getValue(), window);
accumulators.put(window, accum);
}
windowingStrategy.getWindowFn().mergeWindows(mergeContext);
while (!liveWindows.isEmpty() && liveWindows.peek().maxTimestamp().isBefore(e.getTimestamp())) {
closeWindow(perKeyCombineFnRunner, key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, output);
}
}
// and then closed windows. We don't need to retry merging.
while (!liveWindows.isEmpty()) {
closeWindow(perKeyCombineFnRunner, key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, output);
}
}
use of org.apache.beam.runners.core.OutputWindowedValue in project beam by apache.
the class SplittableParDoProcessKeyedElementsOp method open.
@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<byte[]>> timerRegistry, OpEmitter<RawUnionValue> emitter) {
this.pipelineOptions = Base64Serializer.deserializeUnchecked(config.get("beamPipelineOptions"), SerializablePipelineOptions.class).get().as(SamzaPipelineOptions.class);
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), ByteArrayCoder.of(), pipelineOptions.getStoreBatchGetSize());
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(ByteArrayCoder.of(), timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
final KeyedInternals<byte[]> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
SplittableParDoViaKeyedWorkItems.ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = processElements.newProcessFn(processElements.getFn());
DoFnInvokers.tryInvokeSetupFor(processFn, pipelineOptions);
processFn.setStateInternalsFactory(stateInternalsFactory);
processFn.setTimerInternalsFactory(timerInternalsFactory);
processFn.setSideInputReader(NullSideInputReader.empty());
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(processElements.getFn(), pipelineOptions, new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputWindowedValue(mainOutputTag, output, timestamp, windows, pane);
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, NullSideInputReader.empty(), Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10), () -> {
throw new UnsupportedOperationException("BundleFinalizer unsupported in Samza");
}));
final StepContext stepContext = new StepContext() {
@Override
public StateInternals stateInternals() {
return keyedInternals.stateInternals();
}
@Override
public TimerInternals timerInternals() {
return keyedInternals.timerInternals();
}
};
this.fnRunner = DoFnRunners.simpleRunner(pipelineOptions, processFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
}
use of org.apache.beam.runners.core.OutputWindowedValue in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
String stepName = evaluationContext.getStepName(application);
final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public StateInternals stateInternalsForKey(String key) {
return (StateInternals) stepContext.stateInternals();
}
});
processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return stepContext.timerInternals();
}
});
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = parDoEvaluator.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
use of org.apache.beam.runners.core.OutputWindowedValue in project beam by apache.
the class ApexParDoOperator method setup.
@Override
public void setup(OperatorContext context) {
this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
}
for (int i = 0; i < additionalOutputTags.size(); i++) {
@SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
}
NoOpStepContext stepContext = new NoOpStepContext() {
@Override
public StateInternals stateInternals() {
return currentKeyStateInternals;
}
@Override
public TimerInternals timerInternals() {
return currentKeyTimerInternals;
}
};
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
doFnInvoker = DoFnInvokers.invokerFor(doFn);
doFnInvoker.invokeSetup();
if (this.currentKeyStateInternals != null) {
StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
@SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
}
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
if (doFn instanceof ProcessFn) {
@SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
@SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return currentKeyTimerInternals;
}
};
splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
}
}
use of org.apache.beam.runners.core.OutputWindowedValue in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> transform = application.getTransform();
final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory.createEvaluator((AppliedPTransform) application, (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(), inputBundle.getKey(), application.getTransform().getSideInputs(), application.getTransform().getMainOutputTag(), application.getTransform().getAdditionalOutputTags().getAll(), DoFnSchemaInformation.create(), application.getTransform().getSideInputMapping());
final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator();
final ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>) ProcessFnRunner.class.cast(pde.getFnRunner()).getFn();
final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext();
processFn.setStateInternalsFactory(key -> stepContext.stateInternals());
processFn.setTimerInternalsFactory(key -> stepContext.timerInternals());
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = pde.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
SideInputReader sideInputReader = evaluationContext.createSideInputReader(transform.getSideInputs());
processFn.setSideInputReader(sideInputReader);
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(transform.getFn(), options, outputWindowedValue, sideInputReader, ses, // splittable DoFn's in that respect.
100, Duration.standardSeconds(1), stepContext::bundleFinalizer));
return evaluator;
}
Aggregations