use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperator method initializeState.
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
ListStateDescriptor<WindowedValue<InputT>> pushedBackStateDescriptor = new ListStateDescriptor<>("pushed-back-elements", new CoderTypeSerializer<>(windowedInputCoder, serializedOptions));
if (keySelector != null) {
pushedBackElementsHandler = KeyedPushedBackElementsHandler.create(keySelector, getKeyedStateBackend(), pushedBackStateDescriptor);
} else {
ListState<WindowedValue<InputT>> listState = getOperatorStateBackend().getListState(pushedBackStateDescriptor);
pushedBackElementsHandler = NonKeyedPushedBackElementsHandler.create(listState);
}
currentInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentSideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentOutputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend(), serializedOptions);
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
Stream<WindowedValue<InputT>> pushedBack = pushedBackElementsHandler.getElements();
long min = pushedBack.map(v -> v.getTimestamp().getMillis()).reduce(Long.MAX_VALUE, Math::min);
pushedBackWatermark = min;
} else {
pushedBackWatermark = Long.MAX_VALUE;
}
// StatefulPardo or WindowDoFn
if (keyCoder != null) {
keyedStateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder, serializedOptions);
if (timerService == null) {
timerService = getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder, serializedOptions), this);
}
timerInternals = new FlinkTimerInternals();
timeServiceManagerCompat = getTimeServiceManagerCompat();
}
outputManager = outputManagerFactory.create(output, getLockToAcquireForStateAccessDuringBundles(), getOperatorStateBackend());
}
use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method testWatermarkUpdateAfterWatermarkHoldRelease.
@Test
public void testWatermarkUpdateAfterWatermarkHoldRelease() throws Exception {
Coder<WindowedValue<KV<String, String>>> coder = WindowedValue.getValueOnlyCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
List<Long> emittedWatermarkHolds = new ArrayList<>();
KeySelector<WindowedValue<KV<String, String>>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue().getKey(), StringUtf8Coder.of());
DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = new DoFnOperator<KV<String, String>, KV<String, String>>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), StringUtf8Coder.of(), keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()) {
@Override
protected DoFnRunner<KV<String, String>, KV<String, String>> createWrappingDoFnRunner(DoFnRunner<KV<String, String>, KV<String, String>> wrappedRunner, StepContext stepContext) {
StateNamespace namespace = StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
StateTag<WatermarkHoldState> holdTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.LATEST);
WatermarkHoldState holdState = stepContext.stateInternals().state(namespace, holdTag);
TimerInternals timerInternals = stepContext.timerInternals();
return new DoFnRunner<KV<String, String>, KV<String, String>>() {
@Override
public void startBundle() {
wrappedRunner.startBundle();
}
@Override
public void processElement(WindowedValue<KV<String, String>> elem) {
wrappedRunner.processElement(elem);
holdState.add(elem.getTimestamp());
timerInternals.setTimer(namespace, "timer", "family", elem.getTimestamp().plus(Duration.millis(1)), elem.getTimestamp().plus(Duration.millis(1)), TimeDomain.EVENT_TIME);
timerInternals.setTimer(namespace, "cleanup", "", GlobalWindow.INSTANCE.maxTimestamp(), GlobalWindow.INSTANCE.maxTimestamp(), TimeDomain.EVENT_TIME);
}
@Override
public <KeyT> void onTimer(String timerId, String timerFamilyId, KeyT key, BoundedWindow window, Instant timestamp, Instant outputTimestamp, TimeDomain timeDomain) {
if ("cleanup".equals(timerId)) {
holdState.clear();
} else {
holdState.add(outputTimestamp);
}
}
@Override
public void finishBundle() {
wrappedRunner.finishBundle();
}
@Override
public <KeyT> void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {
wrappedRunner.onWindowExpiration(window, timestamp, key);
}
@Override
public DoFn<KV<String, String>, KV<String, String>> getFn() {
return doFn;
}
};
}
@Override
void emitWatermarkIfHoldChanged(long currentWatermarkHold) {
emittedWatermarkHolds.add(keyedStateInternals.minWatermarkHoldMs());
}
};
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup();
Instant now = Instant.now();
testHarness.open();
// process first element, set hold to `now', setup timer for `now + 1'
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now)));
assertThat(emittedWatermarkHolds, is(equalTo(Collections.singletonList(now.getMillis()))));
// fire timer, change hold to `now + 2'
testHarness.processWatermark(now.getMillis() + 2);
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1))));
// process second element, verify we emitted changed hold
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now.plus(Duration.millis(2)))));
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1, now.getMillis() + 2))));
testHarness.processWatermark(GlobalWindow.INSTANCE.maxTimestamp().plus(Duration.millis(1)).getMillis());
testHarness.processWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
testHarness.close();
}
use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method testTimersRestore.
@Test
public void testTimersRestore() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final String outputMessage = "Timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
private static final String EVENT_TIMER_ID = "eventTimer";
@TimerId(EVENT_TIMER_ID)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(EVENT_TIMER_ID) Timer timer) {
timer.set(timerTimestamp);
}
@OnTimer(EVENT_TIMER_ID)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(outputMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
final CoderTypeSerializer<WindowedValue<String>> outputSerializer = new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register a timer
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// snapshot and restore
final OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, VarIntCoder.of(), outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.initializeState(snapshot);
testHarness.open();
// this must fire the timer
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(outputMessage, timerTimestamp, window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method keyedParDoSideInputCheckpointing.
@Test
public void keyedParDoSideInputCheckpointing() throws Exception {
sideInputCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AvroPojoTest method testWithAvroGenericSer.
@Test
public void testWithAvroGenericSer() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy(new KeySelector<User, String>() {
@Override
public String getKey(User value) throws Exception {
return String.valueOf(value.getName());
}
}).reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Charlie,1)\n(Alyssa,1)\n";
}
Aggregations