use of org.apache.flink.util.Collector in project flink by apache.
the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.
@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class GroupReduceCompilationTest method testAllGroupReduceNoCombiner.
@Test
public void testAllGroupReduceNoCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
data.reduceGroup(new RichGroupReduceFunction<Double, Double>() {
public void reduce(Iterable<Double> values, Collector<Double> out) {
}
}).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// the all-reduce has no combiner, when the parallelism of the input is one
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that reduce has the right strategy
assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
// check parallelism
assertEquals(1, sourceNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class DataStreamAllroundTestProgram method main.
public static void main(String[] args) throws Exception {
final ParameterTool pt = ParameterTool.fromArgs(args);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
setupEnvironment(env, pt);
// add a keyed stateful map operator, which uses Kryo for state serialization
DataStream<Event> eventStream = env.addSource(createEventSource(pt)).name(EVENT_SOURCE.getName()).uid(EVENT_SOURCE.getUid()).assignTimestampsAndWatermarks(createTimestampExtractor(pt)).keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
(MapFunction<Event, Event>) in -> in, // ComplexPayload state object
(Event event, ComplexPayload lastState) -> {
if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName(), lastState.getStrPayload());
}
return new ComplexPayload(event, KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName());
}, Arrays.asList(new KryoSerializer<>(ComplexPayload.class, // KryoSerializer
env.getConfig()), // custom
new StatefulComplexPayloadSerializer()), // serializer
Collections.singletonList(// KryoSerializer via type
ComplexPayload.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()).uid(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getUid());
// add a keyed stateful map operator, which uses Avro for state serialization
eventStream = eventStream.keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
(MapFunction<Event, Event>) in -> in, // ComplexPayloadAvro state object
(Event event, ComplexPayloadAvro lastState) -> {
if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_AVRO_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_AVRO_SER.getName(), lastState.getStrPayload());
}
ComplexPayloadAvro payload = new ComplexPayloadAvro();
payload.setEventTime(event.getEventTime());
payload.setInnerPayLoad(new InnerPayLoadAvro(event.getSequenceNumber()));
payload.setStrPayload(KEYED_STATE_OPER_WITH_AVRO_SER.getName());
payload.setStringList(Arrays.asList(String.valueOf(event.getKey()), event.getPayload()));
return payload;
}, Collections.singletonList(new AvroSerializer<>(ComplexPayloadAvro.class)), // custom AvroSerializer
Collections.singletonList(// AvroSerializer via type
ComplexPayloadAvro.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_AVRO_SER.getName()).uid(KEYED_STATE_OPER_WITH_AVRO_SER.getUid());
DataStream<Event> eventStream2 = eventStream.map(createArtificialOperatorStateMapper((MapFunction<Event, Event>) in -> in)).returns(Event.class).name(OPERATOR_STATE_OPER.getName()).uid(OPERATOR_STATE_OPER.getUid());
// apply a tumbling window that simply passes forward window elements;
// this allows the job to cover timers state
@SuppressWarnings("Convert2Lambda") DataStream<Event> eventStream3 = applyTumblingWindows(eventStream2.keyBy(Event::getKey), pt).apply(new WindowFunction<Event, Event, Integer, TimeWindow>() {
@Override
public void apply(Integer integer, TimeWindow window, Iterable<Event> input, Collector<Event> out) {
for (Event e : input) {
out.collect(e);
}
}
}).name(TIME_WINDOW_OPER.getName()).uid(TIME_WINDOW_OPER.getUid());
eventStream3 = DataStreamAllroundTestJobFactory.verifyCustomStatefulTypeSerializer(eventStream3);
if (isSimulateFailures(pt)) {
eventStream3 = eventStream3.map(createFailureMapper(pt)).setParallelism(1).name(FAILURE_MAPPER_NAME.getName()).uid(FAILURE_MAPPER_NAME.getUid());
}
eventStream3.keyBy(Event::getKey).flatMap(createSemanticsCheckMapper(pt)).name(SEMANTICS_CHECK_MAPPER.getName()).uid(SEMANTICS_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SEMANTICS_CHECK_PRINT_SINK.getName()).uid(SEMANTICS_CHECK_PRINT_SINK.getUid());
// Check sliding windows aggregations. Output all elements assigned to a window and later on
// check if each event was emitted slide_factor number of times
DataStream<Tuple2<Integer, List<Event>>> eventStream4 = eventStream2.keyBy(Event::getKey).window(createSlidingWindow(pt)).apply(new WindowFunction<Event, Tuple2<Integer, List<Event>>, Integer, TimeWindow>() {
private static final long serialVersionUID = 3166250579972849440L;
@Override
public void apply(Integer key, TimeWindow window, Iterable<Event> input, Collector<Tuple2<Integer, List<Event>>> out) {
out.collect(Tuple2.of(key, StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList())));
}
}).name(SLIDING_WINDOW_AGG.getName()).uid(SLIDING_WINDOW_AGG.getUid());
eventStream4.keyBy(events -> events.f0).flatMap(createSlidingWindowCheckMapper(pt)).name(SLIDING_WINDOW_CHECK_MAPPER.getName()).uid(SLIDING_WINDOW_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SLIDING_WINDOW_CHECK_PRINT_SINK.getName()).uid(SLIDING_WINDOW_CHECK_PRINT_SINK.getUid());
env.execute("General purpose test job");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class GroupReduceOperatorTest method testGroupReduceCollectionWithRuntimeContext.
@Test
public void testGroupReduceCollectionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> reducer = new RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception {
Iterator<Tuple2<String, Integer>> input = values.iterator();
Tuple2<String, Integer> result = input.next();
int sum = result.f1;
while (input.hasNext()) {
Tuple2<String, Integer> next = input.next();
sum += next.f1;
}
result.f1 = sum;
out.collect(result);
}
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
GroupReduceOperatorBase<Tuple2<String, Integer>, Tuple2<String, Integer>, GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>> op = new GroupReduceOperatorBase<>(reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), new int[] { 0 }, "TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<>(asList(new Tuple2<>("foo", 1), new Tuple2<>("foo", 3), new Tuple2<>("bar", 2), new Tuple2<>("bar", 4)));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(new Tuple2<>("foo", 4), new Tuple2<>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
assertTrue(opened.get());
assertTrue(closed.get());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.util.Collector in project beam by apache.
the class SortingFlinkCombineRunner method combine.
@Override
public void combine(FlinkCombiner<K, InputT, AccumT, OutputT> flinkCombiner, WindowingStrategy<Object, W> windowingStrategy, SideInputReader sideInputReader, PipelineOptions options, Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
@SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = (TimestampCombiner) windowingStrategy.getTimestampCombiner();
// get all elements so that we can sort them, has to fit into
// memory
// this seems very unprudent, but correct, for now
List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
for (WindowedValue<KV<K, InputT>> inputValue : elements) {
for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
sortedInput.add(exploded);
}
}
sortedInput.sort(Comparator.comparing(o -> Iterables.getOnlyElement(o.getWindows()).maxTimestamp()));
if (windowingStrategy.needsMerge()) {
// merge windows, we have to do it in an extra pre-processing step and
// can't do it as we go since the window of early elements would not
// be correct when calling the CombineFn
mergeWindow(sortedInput);
}
// iterate over the elements that are sorted by window timestamp
final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
// create accumulator using the first elements key
WindowedValue<KV<K, InputT>> currentValue = iterator.next();
K key = currentValue.getValue().getKey();
W currentWindow = (W) Iterables.getOnlyElement(currentValue.getWindows());
InputT firstValue = currentValue.getValue().getValue();
AccumT accumulator = flinkCombiner.firstInput(key, firstValue, options, sideInputReader, currentValue.getWindows());
// we use this to keep track of the timestamps assigned by the TimestampCombiner
Instant windowTimestamp = timestampCombiner.assign(currentWindow, currentValue.getTimestamp());
while (iterator.hasNext()) {
WindowedValue<KV<K, InputT>> nextValue = iterator.next();
W nextWindow = (W) Iterables.getOnlyElement(nextValue.getWindows());
if (currentWindow.equals(nextWindow)) {
// continue accumulating and merge windows
InputT value = nextValue.getValue().getValue();
accumulator = flinkCombiner.addInput(key, accumulator, value, options, sideInputReader, currentValue.getWindows());
windowTimestamp = timestampCombiner.combine(windowTimestamp, timestampCombiner.assign(currentWindow, nextValue.getTimestamp()));
} else {
// emit the value that we currently have
out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
currentWindow = nextWindow;
currentValue = nextValue;
InputT value = nextValue.getValue().getValue();
accumulator = flinkCombiner.firstInput(key, value, options, sideInputReader, currentValue.getWindows());
windowTimestamp = timestampCombiner.assign(currentWindow, nextValue.getTimestamp());
}
}
// emit the final accumulator
out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
}
Aggregations