use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.
the class ReduceFnRunner method onTrigger.
/**
* Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
*
* @return output watermark hold added, or {@literal null} if none.
*/
@Nullable
private Instant onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> directContext, ReduceFn<K, InputT, OutputT, W>.Context<K, InputT, OutputT, W> renamedContext, boolean isFinished, boolean isEndOfWindow) throws Exception {
Instant inputWM = timerInternals.currentInputWatermarkTime();
// Calculate the pane info.
final PaneInfo pane = paneInfoTracker.getNextPaneInfo(directContext, isFinished).read();
// Extract the window hold, and as a side effect clear it.
final WatermarkHold.OldAndNewHolds pair = watermarkHold.extractAndRelease(renamedContext, isFinished).read();
// TODO: This isn't accurate if the elements are late. See BEAM-2262
final Instant outputTimestamp = pair.oldHold;
@Nullable Instant newHold = pair.newHold;
final boolean isEmpty = nonEmptyPanes.isEmpty(renamedContext.state()).read();
if (newHold != null) {
// We can't be finished yet.
checkState(!isFinished, "new hold at %s but finished %s", newHold, directContext.window());
// The hold cannot be behind the input watermark.
checkState(!newHold.isBefore(inputWM), "new hold %s is before input watermark %s", newHold, inputWM);
if (newHold.isAfter(directContext.window().maxTimestamp())) {
// The hold must be for garbage collection, which can't have happened yet.
checkState(newHold.isEqual(LateDataUtils.garbageCollectionTime(directContext.window(), windowingStrategy)), "new hold %s should be at garbage collection for window %s plus %s", newHold, directContext.window(), windowingStrategy.getAllowedLateness());
} else {
// The hold must be for the end-of-window, which can't have happened yet.
checkState(newHold.isEqual(directContext.window().maxTimestamp()), "new hold %s should be at end of window %s", newHold, directContext.window());
checkState(!isEndOfWindow, "new hold at %s for %s but this is the watermark trigger", newHold, directContext.window());
}
}
// Only emit a pane if it has data or empty panes are observable.
if (needToEmit(isEmpty, isFinished, pane.getTiming())) {
// Run reduceFn.onTrigger method.
final List<W> windows = Collections.singletonList(directContext.window());
ReduceFn<K, InputT, OutputT, W>.OnTriggerContext<K, InputT, OutputT, W> renamedTriggerContext = contextFactory.forTrigger(directContext.window(), pane, StateStyle.RENAMED, new OnTriggerCallbacks<OutputT>() {
@Override
public void output(OutputT toOutput) {
// We're going to output panes, so commit the (now used) PaneInfo.
// TODO: This is unnecessary if the trigger isFinished since the saved
// state will be immediately deleted.
paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
// Output the actual value.
outputter.outputWindowedValue(KV.of(key, toOutput), outputTimestamp, windows, pane);
}
});
reduceFn.onTrigger(renamedTriggerContext);
}
return newHold;
}
use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.
the class WindowedValueTest method testExplodeWindowsManyWindowsMultipleWindowedValues.
@Test
public void testExplodeWindowsManyWindowsMultipleWindowedValues() {
Instant now = Instant.now();
BoundedWindow centerWindow = new IntervalWindow(now.minus(1000L), now.plus(1000L));
BoundedWindow pastWindow = new IntervalWindow(now.minus(1500L), now.plus(500L));
BoundedWindow futureWindow = new IntervalWindow(now.minus(500L), now.plus(1500L));
BoundedWindow futureFutureWindow = new IntervalWindow(now, now.plus(2000L));
PaneInfo pane = PaneInfo.createPane(false, false, Timing.ON_TIME, 3L, 0L);
WindowedValue<String> value = WindowedValue.of("foo", now, ImmutableList.of(pastWindow, centerWindow, futureWindow, futureFutureWindow), pane);
assertThat(value.explodeWindows(), containsInAnyOrder(WindowedValue.of("foo", now, futureFutureWindow, pane), WindowedValue.of("foo", now, futureWindow, pane), WindowedValue.of("foo", now, centerWindow, pane), WindowedValue.of("foo", now, pastWindow, pane)));
}
use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.
the class DefaultFilenamePolicy method windowedFilename.
@Override
public ResourceId windowedFilename(ResourceId outputDirectory, WindowedContext context, String extension) {
boolean shardTemplateProvidedByUser = !this.shardTemplate.equals(DEFAULT_SHARD_TEMPLATE);
if (shardTemplateProvidedByUser) {
boolean isWindowed = isWindowedTemplate(this.shardTemplate);
if (!isWindowed) {
LOG.info("Template you provided {} does not have enough information to create" + "meaningful windowed file names. Consider using P and W in your template", this.shardTemplate);
}
}
final PaneInfo paneInfo = context.getPaneInfo();
String paneStr = paneInfoToString(paneInfo);
String windowStr = windowToString(context.getWindow());
String templateToUse = shardTemplate;
if (!shardTemplateProvidedByUser) {
LOG.info("User did not provide shard template. For creating windowed file names " + "default template {} will be used", DEFAULT_WINDOWED_SHARD_TEMPLATE);
templateToUse = DEFAULT_WINDOWED_SHARD_TEMPLATE;
}
String filename = constructName(prefix.get(), templateToUse, suffix, context.getShardNumber(), context.getNumShards(), paneStr, windowStr) + extension;
return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
}
use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.
the class ApexParDoOperator method setup.
@Override
public void setup(OperatorContext context) {
this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
}
for (int i = 0; i < additionalOutputTags.size(); i++) {
@SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
}
NoOpStepContext stepContext = new NoOpStepContext() {
@Override
public StateInternals stateInternals() {
return currentKeyStateInternals;
}
@Override
public TimerInternals timerInternals() {
return currentKeyTimerInternals;
}
};
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
doFnInvoker = DoFnInvokers.invokerFor(doFn);
doFnInvoker.invokeSetup();
if (this.currentKeyStateInternals != null) {
StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
@SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
}
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
if (doFn instanceof ProcessFn) {
@SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
@SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return currentKeyTimerInternals;
}
};
splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
}
}
use of org.apache.beam.sdk.transforms.windowing.PaneInfo in project beam by apache.
the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.
@Test
public void testGlobalWindowMinTimestamp() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
operator.setup(null);
operator.beginWindow(1);
Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
final List<Object> results = Lists.newArrayList();
Sink<Object> sink = new Sink<Object>() {
@Override
public void put(Object tuple) {
results.add(tuple);
}
@Override
public int getCount(boolean reset) {
return 0;
}
};
operator.output.setSink(sink);
operator.setup(null);
operator.beginWindow(1);
Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
PaneInfo paneInfo = PaneInfo.NO_FIRING;
WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
Assert.assertEquals("number outputs", 0, results.size());
operator.input.process(watermark);
Assert.assertEquals("number outputs", 2, results.size());
@SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
List<Integer> counts = Lists.newArrayList(1, 1);
Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Aggregations