Examples with MapFunction - org.apache.flink.api.common.functions.MapFunction

Example 36 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class StreamingJobGraphGeneratorTest method testChainStartEndSetting.

/**
	 * Verifies that the chain start/end is correctly set.
	 */
@Test
public void testChainStartEndSetting() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // fromElements -> CHAIN(Map -> Print)
    env.fromElements(1, 2, 3).map(new MapFunction<Integer, Integer>() {

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    }).print();
    JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
    List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
    JobVertex sourceVertex = verticesSorted.get(0);
    JobVertex mapPrintVertex = verticesSorted.get(1);
    assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
    assertEquals(ResultPartitionType.PIPELINED_BOUNDED, mapPrintVertex.getInputs().get(0).getSource().getResultType());
    StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration());
    StreamConfig mapConfig = new StreamConfig(mapPrintVertex.getConfiguration());
    Map<Integer, StreamConfig> chainedConfigs = mapConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader());
    StreamConfig printConfig = chainedConfigs.values().iterator().next();
    assertTrue(sourceConfig.isChainStart());
    assertTrue(sourceConfig.isChainEnd());
    assertTrue(mapConfig.isChainStart());
    assertFalse(mapConfig.isChainEnd());
    assertFalse(printConfig.isChainStart());
    assertTrue(printConfig.isChainEnd());
}

Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Test(org.junit.Test)

Example 37 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class ReplicatingDataSourceITCase method testReplicatedSourceToCross.

@Test
public void testReplicatedSourceToCross() throws Exception {
    /*
		 * Test replicated source going into cross
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>(new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0l, 1000l))), BasicTypeInfo.LONG_TYPE_INFO).map(new ToTuple());
    DataSet<Tuple1<Long>> source2 = env.generateSequence(0l, 1000l).map(new ToTuple());
    DataSet<Tuple1<Long>> pairs = source1.cross(source2).filter(new FilterFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>>() {

        @Override
        public boolean filter(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
            return value.f0.f0.equals(value.f1.f0);
        }
    }).map(new MapFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>, Tuple1<Long>>() {

        @Override
        public Tuple1<Long> map(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
            return value.f0;
        }
    }).sum(0);
    List<Tuple1<Long>> result = pairs.collect();
    String expectedResult = "(500500)";
    compareResultAsText(result, expectedResult);
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) NumberSequenceIterator(org.apache.flink.util.NumberSequenceIterator) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 38 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class StreamOperatorChainingTest method testMultiChaining.

/**
	 * Verify that multi-chaining works.
	 */
private void testMultiChaining(StreamExecutionEnvironment env) throws Exception {
    // the actual elements will not be used
    DataStream<Integer> input = env.fromElements(1, 2, 3);
    sink1Results = new ArrayList<>();
    sink2Results = new ArrayList<>();
    input = input.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    });
    input.map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "First: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink1Results.add(value);
        }
    });
    input.map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "Second: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink2Results.add(value);
        }
    });
    // be build our own StreamTask and OperatorChain
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
    JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    Configuration configuration = chainedVertex.getConfiguration();
    StreamConfig streamConfig = new StreamConfig(configuration);
    StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
    StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, chainedVertex.getName());
    OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = new OperatorChain<>(mockTask);
    headOperator.setup(mockTask, streamConfig, operatorChain.getChainEntryPoint());
    for (StreamOperator<?> operator : operatorChain.getAllOperators()) {
        if (operator != null) {
            operator.open();
        }
    }
    headOperator.processElement(new StreamRecord<>(1));
    headOperator.processElement(new StreamRecord<>(2));
    headOperator.processElement(new StreamRecord<>(3));
    assertThat(sink1Results, contains("First: 1", "First: 2", "First: 3"));
    assertThat(sink2Results, contains("Second: 1", "Second: 2", "Second: 3"));
}

Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) MapFunction(org.apache.flink.api.common.functions.MapFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) StreamMap(org.apache.flink.streaming.api.operators.StreamMap)

Example 39 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class SavepointITCase method testSavepointForJobWithIteration.

@Test
public void testSavepointForJobWithIteration() throws Exception {
    for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
        ITER_TEST_SNAPSHOT_WAIT[i] = new OneShotLatch();
        ITER_TEST_RESTORE_WAIT[i] = new OneShotLatch();
        ITER_TEST_CHECKPOINT_VERIFY[i] = 0;
    }
    TemporaryFolder folder = new TemporaryFolder();
    folder.create();
    // Temporary directory for file state backend
    final File tmpDir = folder.newFolder();
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final IntegerStreamSource source = new IntegerStreamSource();
    IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
            clctr.collect(in);
        }
    }).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Object getKey(Integer value) throws Exception {
            return value;
        }
    }).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
    DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    }).setParallelism(ITER_TEST_PARALLELISM);
    iteration.closeWith(iterationBody);
    StreamGraph streamGraph = env.getStreamGraph();
    streamGraph.setJobName("Test");
    JobGraph jobGraph = streamGraph.getJobGraph();
    Configuration config = new Configuration();
    config.addAll(jobGraph.getJobConfiguration());
    config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2 * jobGraph.getMaximumParallelism());
    final File checkpointDir = new File(tmpDir, "checkpoints");
    final File savepointDir = new File(tmpDir, "savepoints");
    if (!checkpointDir.mkdir() || !savepointDir.mkdirs()) {
        fail("Test setup failed: failed to create temporary directories.");
    }
    config.setString(CoreOptions.STATE_BACKEND, "filesystem");
    config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
    config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
    TestingCluster cluster = new TestingCluster(config, false);
    String savepointPath = null;
    try {
        cluster.start();
        cluster.submitJobDetached(jobGraph);
        for (OneShotLatch latch : ITER_TEST_SNAPSHOT_WAIT) {
            latch.await();
        }
        savepointPath = cluster.triggerSavepoint(jobGraph.getJobID());
        source.cancel();
        jobGraph = streamGraph.getJobGraph();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        cluster.submitJobDetached(jobGraph);
        for (OneShotLatch latch : ITER_TEST_RESTORE_WAIT) {
            latch.await();
        }
        source.cancel();
    } finally {
        if (null != savepointPath) {
            cluster.disposeSavepoint(savepointPath);
        }
        cluster.stop();
        cluster.awaitTermination();
    }
}

Also used : Configuration(org.apache.flink.configuration.Configuration) KeySelector(org.apache.flink.api.java.functions.KeySelector) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) TemporaryFolder(org.junit.rules.TemporaryFolder) Collector(org.apache.flink.util.Collector) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) Test(org.junit.Test)

Example 40 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class UserCodeType method main.

public static void main(String[] args) throws Exception {
    String jarFile = args[0];
    String host = args[1];
    int port = Integer.parseInt(args[2]);
    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(host, port, jarFile);
    env.getConfig().disableSysoutLogging();
    DataSet<Integer> input = env.fromElements(1, 2, 3, 4, 5);
    DataSet<CustomType> customTypes = input.map(new MapFunction<Integer, CustomType>() {

        private static final long serialVersionUID = -5878758010124912128L;

        @Override
        public CustomType map(Integer integer) throws Exception {
            return new CustomType(integer);
        }
    }).rebalance();
    DataSet<Integer> result = customTypes.map(new MapFunction<CustomType, Integer>() {

        private static final long serialVersionUID = -7950126399899584991L;

        @Override
        public Integer map(CustomType value) throws Exception {
            return value.value;
        }
    });
    result.output(new DiscardingOutputFormat<Integer>());
    env.execute();
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5