Examples with MapFunction - org.apache.flink.api.common.functions.MapFunction

Example 6 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class SelfConnectionITCase method differentDataStreamDifferentChain.

/**
	 * We connect two different data streams in different chains to a CoMap.
	 * (This is not actually self-connect.)
	 */
@Test
public void differentDataStreamDifferentChain() {
    TestListResultSink<String> resultSink = new TestListResultSink<String>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(3);
    DataStream<Integer> src = env.fromElements(1, 3, 5).disableChaining();
    DataStream<String> stringMap = src.flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("x " + value);
        }
    }).keyBy(new KeySelector<String, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer getKey(String value) throws Exception {
            return value.length();
        }
    });
    DataStream<Long> longMap = src.map(new MapFunction<Integer, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Long map(Integer value) throws Exception {
            return (long) (value + 1);
        }
    }).keyBy(new KeySelector<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer getKey(Long value) throws Exception {
            return value.intValue();
        }
    });
    stringMap.connect(longMap).map(new CoMapFunction<String, Long, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map1(String value) {
            return value;
        }

        @Override
        public String map2(Long value) {
            return value.toString();
        }
    }).addSink(resultSink);
    try {
        env.execute();
    } catch (Exception e) {
        e.printStackTrace();
    }
    List<String> expected = Arrays.asList("x 1", "x 3", "x 5", "2", "4", "6");
    List<String> result = resultSink.getResult();
    Collections.sort(expected);
    Collections.sort(result);
    assertEquals(expected, result);
}

Also used : CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class DataStreamTest method operatorTest.

@Test
public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Long> src = env.generateSequence(0, 0);
    MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() {

        @Override
        public Integer map(Long value) throws Exception {
            return null;
        }
    };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new DiscardingSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));
    FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Long value, Collector<Integer> out) throws Exception {
        }
    };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new DiscardingSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));
    FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    };
    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);
    unionFilter.addSink(new DiscardingSink<Integer>());
    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() {

        @Override
        public Iterable<String> select(Integer value) {
            return null;
        }
    };
    SplitStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new DiscardingSink<Integer>());
    List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));
    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();
    StreamEdge splitEdge = env.getStreamGraph().getStreamEdges(unionFilter.getId(), sink.getTransformation().getId()).get(0);
    assertEquals("a", splitEdge.getSelectedNames().get(0));
    ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map1(Integer value) {
            return null;
        }

        @Override
        public String map2(Integer value) {
            return null;
        }
    };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new DiscardingSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
}

Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 8 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class StreamOperatorChainingTest method testMultiChainingWithSplit.

/**
	 * Verify that multi-chaining works with object reuse enabled.
	 */
private void testMultiChainingWithSplit(StreamExecutionEnvironment env) throws Exception {
    // the actual elements will not be used
    DataStream<Integer> input = env.fromElements(1, 2, 3);
    sink1Results = new ArrayList<>();
    sink2Results = new ArrayList<>();
    sink3Results = new ArrayList<>();
    input = input.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    });
    SplitStream<Integer> split = input.split(new OutputSelector<Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Iterable<String> select(Integer value) {
            if (value.equals(1)) {
                return Collections.singletonList("one");
            } else {
                return Collections.singletonList("other");
            }
        }
    });
    split.select("one").map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "First 1: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink1Results.add(value);
        }
    });
    split.select("one").map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "First 2: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink2Results.add(value);
        }
    });
    split.select("other").map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "Second: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink3Results.add(value);
        }
    });
    // be build our own StreamTask and OperatorChain
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
    JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    Configuration configuration = chainedVertex.getConfiguration();
    StreamConfig streamConfig = new StreamConfig(configuration);
    StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
    StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, chainedVertex.getName());
    OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = new OperatorChain<>(mockTask);
    headOperator.setup(mockTask, streamConfig, operatorChain.getChainEntryPoint());
    for (StreamOperator<?> operator : operatorChain.getAllOperators()) {
        if (operator != null) {
            operator.open();
        }
    }
    headOperator.processElement(new StreamRecord<>(1));
    headOperator.processElement(new StreamRecord<>(2));
    headOperator.processElement(new StreamRecord<>(3));
    assertThat(sink1Results, contains("First 1: 1"));
    assertThat(sink2Results, contains("First 2: 1"));
    assertThat(sink3Results, contains("Second: 2", "Second: 3"));
}

Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) MapFunction(org.apache.flink.api.common.functions.MapFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) StreamMap(org.apache.flink.streaming.api.operators.StreamMap)

Example 9 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class GroupingSetsITCase method compareSql.

private void compareSql(String query1, String query2) throws Exception {
    // Function to map row to string
    MapFunction<Row, String> mapFunction = new MapFunction<Row, String>() {

        @Override
        public String map(Row value) throws Exception {
            return value == null ? "null" : value.toString();
        }
    };
    // Execute first query and store results
    Table resultTable1 = tableEnv.sql(query1);
    DataSet<Row> resultDataSet1 = tableEnv.toDataSet(resultTable1, Row.class);
    List<String> results1 = resultDataSet1.map(mapFunction).collect();
    // Execute second query and store results
    Table resultTable2 = tableEnv.sql(query2);
    DataSet<Row> resultDataSet2 = tableEnv.toDataSet(resultTable2, Row.class);
    List<String> results2 = resultDataSet2.map(mapFunction).collect();
    // Compare results
    TestBaseUtils.compareResultCollections(results1, results2, new Comparator<String>() {

        @Override
        public int compare(String o1, String o2) {
            return o2 == null ? o1 == null ? 0 : 1 : o1.compareTo(o2);
        }
    });
}

Also used : Table(org.apache.flink.table.api.Table) Row(org.apache.flink.types.Row) MapFunction(org.apache.flink.api.common.functions.MapFunction)

Example 10 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.

/**
	 * FLINK-5985
	 *
	 * This test ensures we can restore from a savepoint under modifications to the job graph that only concern
	 * stateless operators.
	 */
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
    // Config
    int numTaskManagers = 2;
    int numSlotsPerTaskManager = 2;
    int parallelism = 2;
    // Test deadline
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File tmpDir = CommonTestUtils.createTempDirectory();
    final File savepointDir = new File(tmpDir, "savepoints");
    TestingCluster flink = null;
    String savepointPath;
    try {
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
        LOG.info("Flink configuration: " + config + ".");
        // Start Flink
        flink = new TestingCluster(config);
        LOG.info("Starting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        final StatefulCounter statefulCounter = new StatefulCounter();
        StatefulCounter.resetForTest(parallelism);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 4 * value;
            }
        }).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 2 * value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
        JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
        JobID jobID = submissionResult.getJobID();
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
    try {
        LOG.info("Restarting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        // generate a modified job graph that adds a stateless op
        env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
        // Set the savepoint path
        modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
        // Submit the job
        flink.submitJobDetached(modifiedJobGraph);
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
}

Also used : RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) FileNotFoundException(java.io.FileNotFoundException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5