use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class StreamingJobGraphGeneratorTest method testResourcesForIteration.
/**
* Verifies that the resources are merged correctly for chained operators (covers middle
* chaining and iteration cases) when generating job graph.
*/
@Test
public void testResourcesForIteration() throws Exception {
ResourceSpec resource1 = ResourceSpec.newBuilder(0.1, 100).build();
ResourceSpec resource2 = ResourceSpec.newBuilder(0.2, 200).build();
ResourceSpec resource3 = ResourceSpec.newBuilder(0.3, 300).build();
ResourceSpec resource4 = ResourceSpec.newBuilder(0.4, 400).build();
ResourceSpec resource5 = ResourceSpec.newBuilder(0.5, 500).build();
Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);
Method sinkMethod = getSetResourcesMethodAndSetAccessible(DataStreamSink.class);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).name("test_source");
opMethod.invoke(source, resource1);
IterativeStream<Integer> iteration = source.iterate(3000);
opMethod.invoke(iteration, resource2);
DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {
@Override
public void flatMap(Integer value, Collector<Integer> out) throws Exception {
out.collect(value);
}
}).name("test_flatMap");
opMethod.invoke(flatMap, resource3);
// CHAIN(flatMap -> Filter)
DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) throws Exception {
return false;
}
}).name("test_filter");
opMethod.invoke(increment, resource4);
DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {
@Override
public void invoke(Integer value) throws Exception {
}
}).disableChaining().name("test_sink");
sinkMethod.invoke(sink, resource5);
JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
for (JobVertex jobVertex : jobGraph.getVertices()) {
if (jobVertex.getName().contains("test_source")) {
assertTrue(jobVertex.getMinResources().equals(resource1));
} else if (jobVertex.getName().contains("Iteration_Source")) {
assertTrue(jobVertex.getPreferredResources().equals(resource2));
} else if (jobVertex.getName().contains("test_flatMap")) {
assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
} else if (jobVertex.getName().contains("Iteration_Tail")) {
assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
} else if (jobVertex.getName().contains("test_sink")) {
assertTrue(jobVertex.getMinResources().equals(resource5));
}
}
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class ChainTaskTest method testBatchTaskOutputInCloseMethod.
@Test
public void testBatchTaskOutputInCloseMethod() {
final int numChainedTasks = 10;
final int keyCnt = 100;
final int valCnt = 10;
try {
initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
addOutput(outList);
registerTask(FlatMapDriver.class, MockMapStub.class);
for (int i = 0; i < numChainedTasks; i++) {
final TaskConfig taskConfig = new TaskConfig(new Configuration());
taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
taskConfig.setOutputSerializer(serFact);
taskConfig.setStubWrapper(new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class));
getTaskConfig().addChainedTask(ChainedFlatMapDriver.class, taskConfig, "chained-" + i);
}
final BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(mockEnv);
testTask.invoke();
Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size());
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class ChainedOperatorsMetricTest method testOperatorIOMetricReuse.
@Test
public void testOperatorIOMetricReuse() throws Exception {
// environment
initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
this.mockEnv = new MockEnvironmentBuilder().setTaskName(HEAD_OPERATOR_NAME).setManagedMemorySize(MEMORY_MANAGER_SIZE).setInputSplitProvider(this.inputSplitProvider).setBufferSize(NETWORK_BUFFER_SIZE).setMetricGroup(TaskManagerMetricGroup.createTaskManagerMetricGroup(NoOpMetricRegistry.INSTANCE, "host", ResourceID.generate()).addJob(new JobID(), "jobName").addTask(new JobVertexID(), new ExecutionAttemptID(), "task", 0, 0)).build();
final int keyCnt = 100;
final int valCnt = 20;
final int numRecords = keyCnt * valCnt;
addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
addOutput(this.outList);
// the chained operator
addChainedOperator();
// creates the head operator and assembles the chain
registerTask(FlatMapDriver.class, DuplicatingFlatMapFunction.class);
final BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(this.mockEnv);
testTask.invoke();
Assert.assertEquals(numRecords * 2 * 2, this.outList.size());
final TaskMetricGroup taskMetricGroup = mockEnv.getMetricGroup();
// verify task-level metrics
{
final TaskIOMetricGroup ioMetricGroup = taskMetricGroup.getIOMetricGroup();
final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
Assert.assertEquals(numRecords, numRecordsInCounter.getCount());
Assert.assertEquals(numRecords * 2 * 2, numRecordsOutCounter.getCount());
}
// verify head operator metrics
{
// this only returns the existing group and doesn't create a new one
final OperatorMetricGroup operatorMetricGroup1 = taskMetricGroup.getOrAddOperator(HEAD_OPERATOR_NAME);
final OperatorIOMetricGroup ioMetricGroup = operatorMetricGroup1.getIOMetricGroup();
final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
Assert.assertEquals(numRecords, numRecordsInCounter.getCount());
Assert.assertEquals(numRecords * 2, numRecordsOutCounter.getCount());
}
// verify chained operator metrics
{
// this only returns the existing group and doesn't create a new one
final InternalOperatorMetricGroup operatorMetricGroup1 = taskMetricGroup.getOrAddOperator(CHAINED_OPERATOR_NAME);
final InternalOperatorIOMetricGroup ioMetricGroup = operatorMetricGroup1.getIOMetricGroup();
final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
Assert.assertEquals(numRecords * 2, numRecordsInCounter.getCount());
Assert.assertEquals(numRecords * 2 * 2, numRecordsOutCounter.getCount());
}
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class CommonExecLookupJoin method createSyncLookupJoin.
private StreamOperatorFactory<RowData> createSyncLookupJoin(RelOptTable temporalTable, ExecNodeConfig config, Map<Integer, LookupJoinUtil.LookupKey> allLookupKeys, TableFunction<?> syncLookupFunction, RelBuilder relBuilder, RowType inputRowType, RowType tableSourceRowType, RowType resultRowType, boolean isLeftOuterJoin, boolean isObjectReuseEnabled) {
DataTypeFactory dataTypeFactory = ShortcutUtils.unwrapContext(relBuilder).getCatalogManager().getDataTypeFactory();
int[] orderedLookupKeys = LookupJoinUtil.getOrderedLookupKeys(allLookupKeys.keySet());
GeneratedFunction<FlatMapFunction<RowData, RowData>> generatedFetcher = LookupJoinCodeGenerator.generateSyncLookupFunction(config.getTableConfig(), dataTypeFactory, inputRowType, tableSourceRowType, resultRowType, allLookupKeys, orderedLookupKeys, syncLookupFunction, StringUtils.join(temporalTable.getQualifiedName(), "."), isObjectReuseEnabled);
RowType rightRowType = Optional.ofNullable(temporalTableOutputType).map(FlinkTypeFactory::toLogicalRowType).orElse(tableSourceRowType);
CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig());
GeneratedCollector<TableFunctionCollector<RowData>> generatedCollector = LookupJoinCodeGenerator.generateCollector(ctx, inputRowType, rightRowType, resultRowType, JavaScalaConversionUtil.toScala(Optional.ofNullable(joinCondition)), JavaScalaConversionUtil.toScala(Optional.empty()), true);
ProcessFunction<RowData, RowData> processFunc;
if (existCalcOnTemporalTable) {
// a projection or filter after table source scan
GeneratedFunction<FlatMapFunction<RowData, RowData>> generatedCalc = LookupJoinCodeGenerator.generateCalcMapFunction(config.getTableConfig(), JavaScalaConversionUtil.toScala(projectionOnTemporalTable), filterOnTemporalTable, temporalTableOutputType, tableSourceRowType);
processFunc = new LookupJoinWithCalcRunner(generatedFetcher, generatedCalc, generatedCollector, isLeftOuterJoin, rightRowType.getFieldCount());
} else {
// right type is the same as table source row type, because no calc after temporal table
processFunc = new LookupJoinRunner(generatedFetcher, generatedCollector, isLeftOuterJoin, rightRowType.getFieldCount());
}
return SimpleOperatorFactory.of(new ProcessOperator<>(processFunc));
}
use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.
the class SideOutputITCase method testAllWindowLateArrivingEvents.
/**
* Test window late arriving events stream.
*/
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
DataStream<Integer> dataStream = see.fromCollection(elements);
OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
};
SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).windowAll(SlidingEventTimeWindows.of(Time.milliseconds(1), Time.milliseconds(1))).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {
private static final long serialVersionUID = 1L;
@Override
public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
for (Integer val : values) {
out.collect(val);
}
}
});
windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer value, Collector<String> out) throws Exception {
out.collect("late-" + String.valueOf(value));
}
}).addSink(sideOutputResultSink);
see.execute();
assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
Aggregations