use of org.apache.beam.runners.apex.translation.utils.ApexStreamTuple in project beam by apache.
the class ApexParDoOperator method setup.
@Override
public void setup(OperatorContext context) {
this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
}
for (int i = 0; i < additionalOutputTags.size(); i++) {
@SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
}
NoOpStepContext stepContext = new NoOpStepContext() {
@Override
public StateInternals stateInternals() {
return currentKeyStateInternals;
}
@Override
public TimerInternals timerInternals() {
return currentKeyTimerInternals;
}
};
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
doFnInvoker = DoFnInvokers.invokerFor(doFn);
doFnInvoker.invokeSetup();
if (this.currentKeyStateInternals != null) {
StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
@SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
}
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
if (doFn instanceof ProcessFn) {
@SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
@SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return currentKeyTimerInternals;
}
};
splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
}
}
use of org.apache.beam.runners.apex.translation.utils.ApexStreamTuple in project beam by apache.
the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.
@Test
public void testGlobalWindowMinTimestamp() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
operator.setup(null);
operator.beginWindow(1);
Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
final List<Object> results = Lists.newArrayList();
Sink<Object> sink = new Sink<Object>() {
@Override
public void put(Object tuple) {
results.add(tuple);
}
@Override
public int getCount(boolean reset) {
return 0;
}
};
operator.output.setSink(sink);
operator.setup(null);
operator.beginWindow(1);
Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
PaneInfo paneInfo = PaneInfo.NO_FIRING;
WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
Assert.assertEquals("number outputs", 0, results.size());
operator.input.process(watermark);
Assert.assertEquals("number outputs", 2, results.size());
@SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
List<Integer> counts = Lists.newArrayList(1, 1);
Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Aggregations