Search in sources :

Example 31 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class MapOperatorTest method testMapWithRuntimeContext.

@Test
public void testMapWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final MapFunction<String, Integer> parser = new RichMapFunction<String, Integer>() {

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public Integer map(String value) {
                return Integer.parseInt(value);
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        MapOperatorBase<String, Integer, MapFunction<String, Integer>> op = new MapOperatorBase<String, Integer, MapFunction<String, Integer>>(parser, new UnaryOperatorInformation<String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), taskName);
        List<String> input = new ArrayList<String>(asList("1", "2", "3", "4", "5", "6"));
        final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
        final HashMap<String, Future<Path>> cpTasks = new HashMap<>();
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Integer> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Integer> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        assertEquals(asList(1, 2, 3, 4, 5, 6), resultMutableSafe);
        assertEquals(asList(1, 2, 3, 4, 5, 6), resultRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Accumulator(org.apache.flink.api.common.accumulators.Accumulator) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) Future(java.util.concurrent.Future) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Test(org.junit.Test)

Example 32 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class MusicProfiles method main.

public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    /**
		 * Read the user-song-play triplets.
		 */
    DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);
    /**
		 * Read the mismatches dataset and extract the songIDs
		 */
    DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());
    /**
		 * Filter out the mismatches from the triplets dataset
		 */
    DataSet<Tuple3<String, String, Integer>> validTriplets = triplets.coGroup(mismatches).where(1).equalTo(0).with(new FilterOutMismatches());
    /**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
    Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);
    /**
		 * Get the top track (most listened) for each user
		 */
    DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT).filter(new FilterSongNodes());
    if (fileOutput) {
        usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
    } else {
        usersWithTopTrack.print();
    }
    /**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
    DataSet<Edge<String, NullValue>> similarUsers = userSongGraph.getEdges().filter(new FilterFunction<Edge<String, Integer>>() {

        public boolean filter(Edge<String, Integer> edge) {
            return (edge.getValue() > playcountThreshold);
        }
    }).groupBy(1).reduceGroup(new CreateSimilarUserEdges()).distinct();
    Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers, new MapFunction<String, Long>() {

        public Long map(String value) {
            return 1l;
        }
    }, env).getUndirected();
    /**
		 * Detect user communities using the label propagation library method
		 */
    // Initialize each vertex with a unique numeric label and run the label propagation algorithm
    DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils.zipWithUniqueId(similarUsersGraph.getVertexIds()).map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {

        @Override
        public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
            return new Tuple2<String, Long>(tuple2.f1, tuple2.f0);
        }
    });
    DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph.joinWithVertices(idsWithInitialLabels, new VertexJoinFunction<Long, Long>() {

        public Long vertexJoin(Long vertexValue, Long inputValue) {
            return inputValue;
        }
    }).run(new LabelPropagation<String, Long, NullValue>(maxIterations));
    if (fileOutput) {
        verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");
        // since file sinks are lazy, we trigger the execution explicitly
        env.execute();
    } else {
        verticesWithCommunity.print();
    }
}
Also used : VertexJoinFunction(org.apache.flink.graph.VertexJoinFunction) Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) NullValue(org.apache.flink.types.NullValue) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Edge(org.apache.flink.graph.Edge)

Example 33 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class DataStreamTest method testUnion.

/**
	 * Tests union functionality. This ensures that self-unions and unions of streams
	 * with differing parallelism work.
	 *
	 * @throws Exception
	 */
@Test
public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataStream<Long> input1 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnion = input1.union(input1).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input6 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnionDifferentPartition = input6.broadcast().union(input6).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input2 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input3 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> unionDifferingParallelism = input2.union(input3).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input4 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> input5 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> unionDifferingPartitioning = input4.broadcast().union(input5).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    StreamGraph streamGraph = env.getStreamGraph();
    // verify self union
    assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
    }
    // verify self union with differnt partitioners
    assertTrue(streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
    boolean hasForward = false;
    boolean hasBroadcast = false;
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
        if (edge.getPartitioner() instanceof ForwardPartitioner) {
            hasForward = true;
        }
        if (edge.getPartitioner() instanceof BroadcastPartitioner) {
            hasBroadcast = true;
        }
    }
    assertTrue(hasForward && hasBroadcast);
    // verify union of streams with differing parallelism
    assertTrue(streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
        if (edge.getSourceId() == input2.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else if (edge.getSourceId() == input3.getId()) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
    // verify union of streams with differing partitionings
    assertTrue(streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
        if (edge.getSourceId() == input4.getId()) {
            assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
        } else if (edge.getSourceId() == input5.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) Test(org.junit.Test)

Example 34 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class DataStreamTest method testParallelism.

/**
	 * Tests whether parallelism gets set.
	 */
@Test
public void testParallelism() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Tuple2<Long, Long>> src = env.fromElements(new Tuple2<>(0L, 0L));
    env.setParallelism(10);
    SingleOutputStreamOperator<Long> map = src.map(new MapFunction<Tuple2<Long, Long>, Long>() {

        @Override
        public Long map(Tuple2<Long, Long> value) throws Exception {
            return null;
        }
    }).name("MyMap");
    DataStream<Long> windowed = map.windowAll(GlobalWindows.create()).trigger(PurgingTrigger.of(CountTrigger.of(10))).fold(0L, new FoldFunction<Long, Long>() {

        @Override
        public Long fold(Long accumulator, Long value) throws Exception {
            return null;
        }
    });
    windowed.addSink(new DiscardingSink<Long>());
    DataStreamSink<Long> sink = map.addSink(new SinkFunction<Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(Long value) throws Exception {
        }
    });
    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
    env.setParallelism(7);
    // Some parts, such as windowing rely on the fact that previous operators have a parallelism
    // set when instantiating the Discretizer. This would break if we dynamically changed
    // the parallelism of operations when changing the setting on the Execution Environment.
    assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
    assertEquals(10, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
    try {
        src.setParallelism(3);
        fail();
    } catch (IllegalArgumentException success) {
    // do nothing
    }
    DataStreamSource<Long> parallelSource = env.generateSequence(0, 0);
    parallelSource.addSink(new DiscardingSink<Long>());
    assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());
    parallelSource.setParallelism(3);
    assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());
    map.setParallelism(2);
    assertEquals(2, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
    sink.setParallelism(4);
    assertEquals(4, env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getParallelism());
}
Also used : CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 35 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class DataStreamTest method testNaming.

/**
	 * Tests {@link SingleOutputStreamOperator#name(String)} functionality.
	 *
	 * @throws Exception
	 */
@Test
public void testNaming() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Long> dataStream1 = env.generateSequence(0, 0).name("testSource1").map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).name("testMap");
    DataStream<Long> dataStream2 = env.generateSequence(0, 0).name("testSource2").map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).name("testMap");
    dataStream1.connect(dataStream2).flatMap(new CoFlatMapFunction<Long, Long, Long>() {

        @Override
        public void flatMap1(Long value, Collector<Long> out) throws Exception {
        }

        @Override
        public void flatMap2(Long value, Collector<Long> out) throws Exception {
        }
    }).name("testCoFlatMap").windowAll(GlobalWindows.create()).trigger(PurgingTrigger.of(CountTrigger.of(10))).fold(0L, new FoldFunction<Long, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Long fold(Long accumulator, Long value) throws Exception {
            return null;
        }
    }).name("testWindowFold").print();
    //test functionality through the operator names in the execution plan
    String plan = env.getExecutionPlan();
    assertTrue(plan.contains("testSource1"));
    assertTrue(plan.contains("testSource2"));
    assertTrue(plan.contains("testMap"));
    assertTrue(plan.contains("testMap"));
    assertTrue(plan.contains("testCoFlatMap"));
    assertTrue(plan.contains("testWindowFold"));
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5