use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class SortingBoundedInputITCase method testOneInputOperator.
@Test
public void testOneInputOperator() {
long numberOfRecords = 1_000_000;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
DataStreamSource<Tuple2<Integer, byte[]>> elements = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
SingleOutputStreamOperator<Long> counts = elements.keyBy(element -> element.f0).transform("Asserting operator", BasicTypeInfo.LONG_TYPE_INFO, new AssertingOperator());
long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
assertThat(sum, equalTo(numberOfRecords));
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.
@Test
public void testBatchExecutionWithTimersOneInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.
@Test
public void testBatchExecutionWithTimersTwoInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
private void processElement(Integer value, Context ctx) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class StreamOperatorChainingTest method testMultiChainingWithSplit.
/**
* Verify that multi-chaining works with object reuse enabled.
*/
private void testMultiChainingWithSplit(StreamExecutionEnvironment env) throws Exception {
// set parallelism to 2 to avoid chaining with source in case when available processors is
// 1.
env.setParallelism(2);
// the actual elements will not be used
DataStream<Integer> input = env.fromElements(1, 2, 3);
sink1Results = new ArrayList<>();
sink2Results = new ArrayList<>();
sink3Results = new ArrayList<>();
input = input.map(value -> value);
OutputTag<Integer> oneOutput = new OutputTag<Integer>("one") {
};
OutputTag<Integer> otherOutput = new OutputTag<Integer>("other") {
};
SingleOutputStreamOperator<Object> split = input.process(new ProcessFunction<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
if (value.equals(1)) {
ctx.output(oneOutput, value);
} else {
ctx.output(otherOutput, value);
}
}
});
split.getSideOutput(oneOutput).map(value -> "First 1: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink1Results.add(value);
}
});
split.getSideOutput(oneOutput).map(value -> "First 2: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink2Results.add(value);
}
});
split.getSideOutput(otherOutput).map(value -> "Second: " + value).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value, Context ctx) throws Exception {
sink3Results.add(value);
}
});
// be build our own StreamTask and OperatorChain
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
Configuration configuration = chainedVertex.getConfiguration();
StreamConfig streamConfig = new StreamConfig(configuration);
StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
try (MockEnvironment environment = createMockEnvironment(chainedVertex.getName())) {
StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, environment);
OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = createOperatorChain(streamConfig, environment, mockTask);
headOperator.setup(mockTask, streamConfig, operatorChain.getMainOperatorOutput());
operatorChain.initializeStateAndOpenOperators(null);
headOperator.processElement(new StreamRecord<>(1));
headOperator.processElement(new StreamRecord<>(2));
headOperator.processElement(new StreamRecord<>(3));
assertThat(sink1Results, contains("First 1: 1"));
assertThat(sink2Results, contains("First 2: 1"));
assertThat(sink3Results, contains("Second: 2", "Second: 3"));
}
}
use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.
the class DataStreamJavaITCase method testTableStreamConversionBatch.
@Test
public void testTableStreamConversionBatch() throws Exception {
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStreamSource<Row> streamSource = env.fromElements(Row.of("Alice"), Row.of("alice"), Row.of("lily"), Row.of("Bob"), Row.of("lily"), Row.of("lily"));
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env);
Table sourceTable = tableEnvironment.fromDataStream(streamSource).as("word");
tableEnvironment.createTemporaryView("tmp_table", sourceTable);
Table resultTable = tableEnvironment.sqlQuery("select UPPER(word) as word from tmp_table");
SingleOutputStreamOperator<Tuple2<String, Integer>> resultStream = tableEnvironment.toDataStream(resultTable).map(row -> (String) row.getField("word")).returns(TypeInformation.of(String.class)).map(s -> new Tuple2<>(s, 1)).returns(TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {
})).keyBy(tuple -> tuple.f0).sum(1);
testResult(resultStream, new Tuple2<>("ALICE", 2), new Tuple2<>("BOB", 1), new Tuple2<>("LILY", 3));
}
Aggregations