Examples with MapFunction - org.apache.flink.api.common.functions.MapFunction

Example 1 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class ConnectedComponentsTest method getConnectedComponentsPlan.

private static Plan getConnectedComponentsPlan(int parallelism, int iterations, boolean solutionSetFirst) {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    DataSet<Tuple2<Long, Long>> verticesWithId = env.generateSequence(0, 1000).name("Vertices").map(new MapFunction<Long, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> map(Long value) {
            return new Tuple2<Long, Long>(value, value);
        }
    }).name("Assign Vertex Ids");
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, iterations, 0).name("Connected Components Iteration");
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 0L)).name("Edges");
    DataSet<Tuple2<Long, Long>> minCandidateId = iteration.getWorkset().join(edges).where(0).equalTo(0).projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor").groupBy(0).min(1).name("Find Minimum Candidate Id");
    DataSet<Tuple2<Long, Long>> updateComponentId;
    if (solutionSetFirst) {
        updateComponentId = iteration.getSolutionSet().join(minCandidateId).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void join(Tuple2<Long, Long> current, Tuple2<Long, Long> candidate, Collector<Tuple2<Long, Long>> out) {
                if (candidate.f1 < current.f1) {
                    out.collect(candidate);
                }
            }
        }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
    } else {
        updateComponentId = minCandidateId.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void join(Tuple2<Long, Long> candidate, Tuple2<Long, Long> current, Collector<Tuple2<Long, Long>> out) {
                if (candidate.f1 < current.f1) {
                    out.collect(candidate);
                }
            }
        }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
    }
    iteration.closeWith(updateComponentId, updateComponentId).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("Result");
    return env.createProgramPlan();
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) MapFunction(org.apache.flink.api.common.functions.MapFunction) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat)

Example 2 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class JobGraphGeneratorTest method testResourcesForChainedOperators.

/**
	 * Verifies that the resources are merged correctly for chained operators when
	 * generating job graph
	 */
@Test
public void testResourcesForChainedOperators() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    ResourceSpec resource6 = new ResourceSpec(0.6, 600);
    ResourceSpec resource7 = new ResourceSpec(0.7, 700);
    Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    MapFunction<Long, Long> mapFunction = new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return value;
        }
    };
    FilterFunction<Long> filterFunction = new FilterFunction<Long>() {

        @Override
        public boolean filter(Long value) throws Exception {
            return false;
        }
    };
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Long> input = env.fromElements(1L, 2L, 3L);
    opMethod.invoke(input, resource1);
    DataSet<Long> map1 = input.map(mapFunction);
    opMethod.invoke(map1, resource2);
    // CHAIN(Source -> Map -> Filter)
    DataSet<Long> filter1 = map1.filter(filterFunction);
    opMethod.invoke(filter1, resource3);
    IterativeDataSet<Long> startOfIteration = filter1.iterate(10);
    opMethod.invoke(startOfIteration, resource4);
    DataSet<Long> map2 = startOfIteration.map(mapFunction);
    opMethod.invoke(map2, resource5);
    // CHAIN(Map -> Filter)
    DataSet<Long> feedback = map2.filter(filterFunction);
    opMethod.invoke(feedback, resource6);
    DataSink<Long> sink = startOfIteration.closeWith(feedback).output(new DiscardingOutputFormat<Long>());
    sinkMethod.invoke(sink, resource7);
    Plan plan = env.createProgramPlan();
    Optimizer pc = new Optimizer(new Configuration());
    OptimizedPlan op = pc.compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(op);
    JobVertex sourceMapFilterVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
    JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
    JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
    assertTrue(sourceMapFilterVertex.getMinResources().equals(resource1.merge(resource2).merge(resource3)));
    assertTrue(iterationHeadVertex.getPreferredResources().equals(resource4));
    assertTrue(feedbackVertex.getMinResources().equals(resource5.merge(resource6)));
    assertTrue(sinkVertex.getPreferredResources().equals(resource7));
    assertTrue(iterationSyncVertex.getMinResources().equals(resource4));
}

Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Optimizer(org.apache.flink.optimizer.Optimizer) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Test(org.junit.Test)

Example 3 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.

@Test
public void testRangePartitionOperatorPreservesFields2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
        PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
        rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {

            @Override
            public Long map(Tuple2<Long, Long> value) throws Exception {
                return value.f1;
            }
        }).output(new DiscardingOutputFormat<Long>());
        rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return value.f0 % 2 == 0;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
        SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
        List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
        assertEquals(3, sourceOutgoingChannels.size());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
        List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
        assertEquals(2, partitionOutputChannels.size());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Channel(org.apache.flink.optimizer.plan.Channel) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 4 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class FastFailuresITCase method testThis.

@Test
public void testThis() {
    Configuration config = new Configuration();
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2);
    LocalFlinkMiniCluster cluster = new LocalFlinkMiniCluster(config, false);
    cluster.start();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
    env.getConfig().disableSysoutLogging();
    env.setParallelism(4);
    env.enableCheckpointing(1000);
    env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(210, 0));
    DataStream<Tuple2<Integer, Integer>> input = env.addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() {

        @Override
        public void open(Configuration parameters) {
            if (FAILURES_SO_FAR.incrementAndGet() <= NUM_FAILURES) {
                throw new RuntimeException("fail");
            }
        }

        @Override
        public void run(SourceContext<Tuple2<Integer, Integer>> ctx) {
        }

        @Override
        public void cancel() {
        }
    });
    input.keyBy(0).map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {

        @Override
        public Integer map(Tuple2<Integer, Integer> value) {
            return value.f0;
        }
    }).addSink(new SinkFunction<Integer>() {

        @Override
        public void invoke(Integer value) {
        }
    });
    try {
        env.execute();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 5 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class StreamingOperatorsITCase method testGroupedFoldOperation.

/**
	 * Tests the proper functioning of the streaming fold operator. For this purpose, a stream
	 * of Tuple2<Integer, Integer> is created. The stream is grouped according to the first tuple
	 * value. Each group is folded where the second tuple value is summed up.
	 *
	 * This test relies on the hash function used by the {@link DataStream#keyBy}, which is
	 * assumed to be {@link MathUtils#murmurHash}.
	 */
@Test
public void testGroupedFoldOperation() throws Exception {
    int numElements = 10;
    final int numKeys = 2;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Integer, Integer>> sourceStream = env.addSource(new TupleSource(numElements, numKeys));
    SplitStream<Tuple2<Integer, Integer>> splittedResult = sourceStream.keyBy(0).fold(0, new FoldFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 4875723041825726082L;

        @Override
        public Integer fold(Integer accumulator, Tuple2<Integer, Integer> value) throws Exception {
            return accumulator + value.f1;
        }
    }).map(new RichMapFunction<Integer, Tuple2<Integer, Integer>>() {

        private static final long serialVersionUID = 8538355101606319744L;

        int key = -1;

        @Override
        public Tuple2<Integer, Integer> map(Integer value) throws Exception {
            if (key == -1) {
                key = MathUtils.murmurHash(value) % numKeys;
            }
            return new Tuple2<>(key, value);
        }
    }).split(new OutputSelector<Tuple2<Integer, Integer>>() {

        private static final long serialVersionUID = -8439325199163362470L;

        @Override
        public Iterable<String> select(Tuple2<Integer, Integer> value) {
            List<String> output = new ArrayList<>();
            output.add(value.f0 + "");
            return output;
        }
    });
    final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0);
    final List<Integer> actualResult1 = new ArrayList<>();
    MemorySinkFunction.registerCollection(0, actualResult1);
    splittedResult.select("0").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 2114608668010092995L;

        @Override
        public Integer map(Tuple2<Integer, Integer> value) throws Exception {
            return value.f1;
        }
    }).addSink(sinkFunction1);
    final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1);
    final List<Integer> actualResult2 = new ArrayList<>();
    MemorySinkFunction.registerCollection(1, actualResult2);
    splittedResult.select("1").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 5631104389744681308L;

        @Override
        public Integer map(Tuple2<Integer, Integer> value) throws Exception {
            return value.f1;
        }
    }).addSink(sinkFunction2);
    Collection<Integer> expected1 = new ArrayList<>(10);
    Collection<Integer> expected2 = new ArrayList<>(10);
    int counter1 = 0;
    int counter2 = 0;
    for (int i = 0; i < numElements; i++) {
        if (MathUtils.murmurHash(i) % numKeys == 0) {
            counter1 += i;
            expected1.add(counter1);
        } else {
            counter2 += i;
            expected2.add(counter2);
        }
    }
    env.execute();
    Collections.sort(actualResult1);
    Collections.sort(actualResult2);
    Assert.assertEquals(expected1, actualResult1);
    Assert.assertEquals(expected2, actualResult2);
    MemorySinkFunction.clear();
}

Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5