use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class DistinctCompilationTest method testDistinctWithSelectorFunctionKey.
@Test
public void testDistinctWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.distinct(new KeySelector<Tuple2<String, Double>, String>() {
public String getKey(Tuple2<String, Double> value) {
return value.f0;
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AvroTypeExtractionTest method testWithAvroGenericSer.
@Test
public void testWithAvroGenericSer() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName())).reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
for (User u : values) {
out.collect(new Tuple2<>(u.getName().toString(), 1));
}
}).returns(Types.TUPLE(Types.STRING, Types.INT));
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Charlie,1)\n(Alyssa,1)\n";
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class UnionTranslationTest method translateUnion3SortedGroup.
@Test
public void translateUnion3SortedGroup() {
try {
final int parallelism = 4;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 2);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 3);
DataSet<Tuple3<Double, StringValue, LongValue>> dataset3 = getSourceDataSet(env, -1);
dataset1.union(dataset2).union(dataset3).groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "").sortGroup((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "", Order.ASCENDING).reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {
}).returns(String.class).output(new DiscardingOutputFormat<>());
Plan p = env.createProgramPlan();
// The plan should look like the following one.
//
// DataSet1(2) - MapOperator(2)-+
// |- Union(-1) -+
// DataSet2(3) - MapOperator(3)-+ |- Union(-1) - SingleInputOperator - Sink
// |
// DataSet3(-1) - MapOperator(-1)-+
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
Union secondUnionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();
// The first input of the second union should be the first union.
Union firstUnionOperator = (Union) secondUnionOperator.getFirstInput();
// The key mapper should be added to the second input stream of the second union.
assertTrue(secondUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
// The key mappers should be added to both of the two input streams for the first union.
assertTrue(firstUnionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
assertTrue(firstUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
// The parallelisms of the key mappers should be equal to those of their inputs.
assertEquals(firstUnionOperator.getFirstInput().getParallelism(), 2);
assertEquals(firstUnionOperator.getSecondInput().getParallelism(), 3);
assertEquals(secondUnionOperator.getSecondInput().getParallelism(), -1);
// The union should always have the default parallelism.
assertEquals(secondUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
assertEquals(firstUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method testWatermarkContract.
/**
* This test specifically verifies that we correctly map Flink watermarks to Beam watermarks. In
* Beam, a watermark {@code T} guarantees there will not be elements with a timestamp {@code < T}
* in the future. In Flink, a watermark {@code T} guarantees there will not be elements with a
* timestamp {@code <= T} in the future. We have to make sure to take this into account when
* firing timers.
*
* <p>This does not test the timer API in general or processing-time timers because there are
* generic tests for this in {@code ParDoTest}.
*/
@Test
public void testWatermarkContract() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final Instant timerOutputTimestamp = timerTimestamp.minus(Duration.millis(1));
final String eventTimeMessage = "Event timer fired: ";
final String processingTimeMessage = "Processing timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
final String eventTimerId = "eventTimer";
final String eventTimerId2 = "eventTimer2";
final String processingTimerId = "processingTimer";
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@TimerId(eventTimerId)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(eventTimerId2)
private final TimerSpec eventTimer2 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(processingTimerId)
private final TimerSpec processingTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(eventTimerId) Timer eventTimer, @TimerId(eventTimerId2) Timer eventTimerWithOutputTimestamp, @TimerId(processingTimerId) Timer processingTimer) {
eventTimer.set(timerTimestamp);
eventTimerWithOutputTimestamp.withOutputTimestamp(timerOutputTimestamp).set(timerTimestamp);
processingTimer.offset(Duration.millis(timerTimestamp.getMillis())).setRelative();
}
@OnTimer(eventTimerId)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(eventTimeMessage + eventTimerId, context.timestamp());
}
@OnTimer(eventTimerId2)
public void onEventTime2(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.fireTimestamp());
context.output(eventTimeMessage + eventTimerId2);
}
@OnTimer(processingTimerId)
public void onProcessingTime(OnTimerContext context) {
assertEquals(// See SimpleDoFnRunner#onTimer
"Timer timestamp must match current input watermark", timerTimestamp.plus(Duration.millis(1)), context.timestamp());
context.outputWithTimestamp(processingTimeMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup(new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())));
testHarness.open();
testHarness.processWatermark(0);
testHarness.setProcessingTime(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register the two timers above
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// this does not yet fire the timers (in vanilla Flink it would)
testHarness.processWatermark(timerTimestamp.getMillis());
testHarness.setProcessingTime(timerTimestamp.getMillis());
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
assertThat(doFnOperator.keyedStateInternals.minWatermarkHoldMs(), is(timerOutputTimestamp.getMillis()));
// this must fire the event timers
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(eventTimeMessage + eventTimerId, timerTimestamp, window1, PaneInfo.NO_FIRING), WindowedValue.of(eventTimeMessage + eventTimerId2, timerTimestamp.minus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
// this must fire the processing timer
testHarness.setProcessingTime(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(// See SimpleDoFnRunner#onTimer
processingTimeMessage, timerTimestamp.plus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
Aggregations