Search in sources :

Example 11 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class StreamingJobGraphGeneratorTest method testResourcesForIteration.

/**
 * Verifies that the resources are merged correctly for chained operators (covers middle
 * chaining and iteration cases) when generating job graph.
 */
@Test
public void testResourcesForIteration() throws Exception {
    ResourceSpec resource1 = ResourceSpec.newBuilder(0.1, 100).build();
    ResourceSpec resource2 = ResourceSpec.newBuilder(0.2, 200).build();
    ResourceSpec resource3 = ResourceSpec.newBuilder(0.3, 300).build();
    ResourceSpec resource4 = ResourceSpec.newBuilder(0.4, 400).build();
    ResourceSpec resource5 = ResourceSpec.newBuilder(0.5, 500).build();
    Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);
    Method sinkMethod = getSetResourcesMethodAndSetAccessible(DataStreamSink.class);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    }).name("test_source");
    opMethod.invoke(source, resource1);
    IterativeStream<Integer> iteration = source.iterate(3000);
    opMethod.invoke(iteration, resource2);
    DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {

        @Override
        public void flatMap(Integer value, Collector<Integer> out) throws Exception {
            out.collect(value);
        }
    }).name("test_flatMap");
    opMethod.invoke(flatMap, resource3);
    // CHAIN(flatMap -> Filter)
    DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    }).name("test_filter");
    opMethod.invoke(increment, resource4);
    DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {

        @Override
        public void invoke(Integer value) throws Exception {
        }
    }).disableChaining().name("test_sink");
    sinkMethod.invoke(sink, resource5);
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
    for (JobVertex jobVertex : jobGraph.getVertices()) {
        if (jobVertex.getName().contains("test_source")) {
            assertTrue(jobVertex.getMinResources().equals(resource1));
        } else if (jobVertex.getName().contains("Iteration_Source")) {
            assertTrue(jobVertex.getPreferredResources().equals(resource2));
        } else if (jobVertex.getName().contains("test_flatMap")) {
            assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
        } else if (jobVertex.getName().contains("Iteration_Tail")) {
            assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
        } else if (jobVertex.getName().contains("test_sink")) {
            assertTrue(jobVertex.getMinResources().equals(resource5));
        }
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 12 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class ChainTaskTest method testBatchTaskOutputInCloseMethod.

@Test
public void testBatchTaskOutputInCloseMethod() {
    final int numChainedTasks = 10;
    final int keyCnt = 100;
    final int valCnt = 10;
    try {
        initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
        addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
        addOutput(outList);
        registerTask(FlatMapDriver.class, MockMapStub.class);
        for (int i = 0; i < numChainedTasks; i++) {
            final TaskConfig taskConfig = new TaskConfig(new Configuration());
            taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
            taskConfig.setOutputSerializer(serFact);
            taskConfig.setStubWrapper(new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class));
            getTaskConfig().addChainedTask(ChainedFlatMapDriver.class, taskConfig, "chained-" + i);
        }
        final BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(mockEnv);
        testTask.invoke();
        Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BatchTask(org.apache.flink.runtime.operators.BatchTask) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Record(org.apache.flink.types.Record) UniformRecordGenerator(org.apache.flink.runtime.operators.testutils.UniformRecordGenerator) IOException(java.io.IOException) DataSourceTaskTest(org.apache.flink.runtime.operators.DataSourceTaskTest) Test(org.junit.Test)

Example 13 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class ChainedOperatorsMetricTest method testOperatorIOMetricReuse.

@Test
public void testOperatorIOMetricReuse() throws Exception {
    // environment
    initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
    this.mockEnv = new MockEnvironmentBuilder().setTaskName(HEAD_OPERATOR_NAME).setManagedMemorySize(MEMORY_MANAGER_SIZE).setInputSplitProvider(this.inputSplitProvider).setBufferSize(NETWORK_BUFFER_SIZE).setMetricGroup(TaskManagerMetricGroup.createTaskManagerMetricGroup(NoOpMetricRegistry.INSTANCE, "host", ResourceID.generate()).addJob(new JobID(), "jobName").addTask(new JobVertexID(), new ExecutionAttemptID(), "task", 0, 0)).build();
    final int keyCnt = 100;
    final int valCnt = 20;
    final int numRecords = keyCnt * valCnt;
    addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
    addOutput(this.outList);
    // the chained operator
    addChainedOperator();
    // creates the head operator and assembles the chain
    registerTask(FlatMapDriver.class, DuplicatingFlatMapFunction.class);
    final BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(this.mockEnv);
    testTask.invoke();
    Assert.assertEquals(numRecords * 2 * 2, this.outList.size());
    final TaskMetricGroup taskMetricGroup = mockEnv.getMetricGroup();
    // verify task-level metrics
    {
        final TaskIOMetricGroup ioMetricGroup = taskMetricGroup.getIOMetricGroup();
        final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
        final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
        Assert.assertEquals(numRecords, numRecordsInCounter.getCount());
        Assert.assertEquals(numRecords * 2 * 2, numRecordsOutCounter.getCount());
    }
    // verify head operator metrics
    {
        // this only returns the existing group and doesn't create a new one
        final OperatorMetricGroup operatorMetricGroup1 = taskMetricGroup.getOrAddOperator(HEAD_OPERATOR_NAME);
        final OperatorIOMetricGroup ioMetricGroup = operatorMetricGroup1.getIOMetricGroup();
        final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
        final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
        Assert.assertEquals(numRecords, numRecordsInCounter.getCount());
        Assert.assertEquals(numRecords * 2, numRecordsOutCounter.getCount());
    }
    // verify chained operator metrics
    {
        // this only returns the existing group and doesn't create a new one
        final InternalOperatorMetricGroup operatorMetricGroup1 = taskMetricGroup.getOrAddOperator(CHAINED_OPERATOR_NAME);
        final InternalOperatorIOMetricGroup ioMetricGroup = operatorMetricGroup1.getIOMetricGroup();
        final Counter numRecordsInCounter = ioMetricGroup.getNumRecordsInCounter();
        final Counter numRecordsOutCounter = ioMetricGroup.getNumRecordsOutCounter();
        Assert.assertEquals(numRecords * 2, numRecordsInCounter.getCount());
        Assert.assertEquals(numRecords * 2 * 2, numRecordsOutCounter.getCount());
    }
}
Also used : MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) BatchTask(org.apache.flink.runtime.operators.BatchTask) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) OperatorIOMetricGroup(org.apache.flink.metrics.groups.OperatorIOMetricGroup) InternalOperatorIOMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorIOMetricGroup) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) InternalOperatorIOMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorIOMetricGroup) Counter(org.apache.flink.metrics.Counter) InternalOperatorMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorMetricGroup) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TaskIOMetricGroup(org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup) Record(org.apache.flink.types.Record) UniformRecordGenerator(org.apache.flink.runtime.operators.testutils.UniformRecordGenerator) InternalOperatorMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorMetricGroup) OperatorMetricGroup(org.apache.flink.metrics.groups.OperatorMetricGroup) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 14 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class CommonExecLookupJoin method createSyncLookupJoin.

private StreamOperatorFactory<RowData> createSyncLookupJoin(RelOptTable temporalTable, ExecNodeConfig config, Map<Integer, LookupJoinUtil.LookupKey> allLookupKeys, TableFunction<?> syncLookupFunction, RelBuilder relBuilder, RowType inputRowType, RowType tableSourceRowType, RowType resultRowType, boolean isLeftOuterJoin, boolean isObjectReuseEnabled) {
    DataTypeFactory dataTypeFactory = ShortcutUtils.unwrapContext(relBuilder).getCatalogManager().getDataTypeFactory();
    int[] orderedLookupKeys = LookupJoinUtil.getOrderedLookupKeys(allLookupKeys.keySet());
    GeneratedFunction<FlatMapFunction<RowData, RowData>> generatedFetcher = LookupJoinCodeGenerator.generateSyncLookupFunction(config.getTableConfig(), dataTypeFactory, inputRowType, tableSourceRowType, resultRowType, allLookupKeys, orderedLookupKeys, syncLookupFunction, StringUtils.join(temporalTable.getQualifiedName(), "."), isObjectReuseEnabled);
    RowType rightRowType = Optional.ofNullable(temporalTableOutputType).map(FlinkTypeFactory::toLogicalRowType).orElse(tableSourceRowType);
    CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig());
    GeneratedCollector<TableFunctionCollector<RowData>> generatedCollector = LookupJoinCodeGenerator.generateCollector(ctx, inputRowType, rightRowType, resultRowType, JavaScalaConversionUtil.toScala(Optional.ofNullable(joinCondition)), JavaScalaConversionUtil.toScala(Optional.empty()), true);
    ProcessFunction<RowData, RowData> processFunc;
    if (existCalcOnTemporalTable) {
        // a projection or filter after table source scan
        GeneratedFunction<FlatMapFunction<RowData, RowData>> generatedCalc = LookupJoinCodeGenerator.generateCalcMapFunction(config.getTableConfig(), JavaScalaConversionUtil.toScala(projectionOnTemporalTable), filterOnTemporalTable, temporalTableOutputType, tableSourceRowType);
        processFunc = new LookupJoinWithCalcRunner(generatedFetcher, generatedCalc, generatedCollector, isLeftOuterJoin, rightRowType.getFieldCount());
    } else {
        // right type is the same as table source row type, because no calc after temporal table
        processFunc = new LookupJoinRunner(generatedFetcher, generatedCollector, isLeftOuterJoin, rightRowType.getFieldCount());
    }
    return SimpleOperatorFactory.of(new ProcessOperator<>(processFunc));
}
Also used : CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowType(org.apache.flink.table.types.logical.RowType) DataTypeFactory(org.apache.flink.table.catalog.DataTypeFactory) TableFunctionCollector(org.apache.flink.table.runtime.collector.TableFunctionCollector) AsyncLookupJoinRunner(org.apache.flink.table.runtime.operators.join.lookup.AsyncLookupJoinRunner) LookupJoinRunner(org.apache.flink.table.runtime.operators.join.lookup.LookupJoinRunner) RowData(org.apache.flink.table.data.RowData) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) LookupJoinWithCalcRunner(org.apache.flink.table.runtime.operators.join.lookup.LookupJoinWithCalcRunner) AsyncLookupJoinWithCalcRunner(org.apache.flink.table.runtime.operators.join.lookup.AsyncLookupJoinWithCalcRunner)

Example 15 with FlatMapFunction

use of org.apache.flink.api.common.functions.FlatMapFunction in project flink by apache.

the class SideOutputITCase method testAllWindowLateArrivingEvents.

/**
 * Test window late arriving events stream.
 */
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
    };
    SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).windowAll(SlidingEventTimeWindows.of(Time.milliseconds(1), Time.milliseconds(1))).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
            for (Integer val : values) {
                out.collect(val);
            }
        }
    });
    windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("late-" + String.valueOf(value));
        }
    }).addSink(sideOutputResultSink);
    see.execute();
    assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)9 Collector (org.apache.flink.util.Collector)9 Test (org.junit.Test)7 IOException (java.io.IOException)4 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 DataStream (org.apache.flink.streaming.api.datastream.DataStream)3 Collections (java.util.Collections)2 List (java.util.List)2 Properties (java.util.Properties)2 MapFunction (org.apache.flink.api.common.functions.MapFunction)2 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)2 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2