Examples with MapFunction - org.apache.flink.api.common.functions.MapFunction

Example 16 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class SolutionSetDuplicatesITCase method testProgram.

@Test
public void testProgram() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.generateSequence(0, 10).flatMap(new FlatMapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public void flatMap(Long value, Collector<Tuple2<Long, Long>> out) {
                out.collect(new Tuple2<Long, Long>(value, value));
                out.collect(new Tuple2<Long, Long>(value, value));
                out.collect(new Tuple2<Long, Long>(value, value));
            }
        }).rebalance();
        DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = data.iterateDelta(data, 10, 0);
        List<Integer> result = iter.closeWith(iter.getWorkset(), iter.getWorkset()).map(new MapFunction<Tuple2<Long, Long>, Integer>() {

            @Override
            public Integer map(Tuple2<Long, Long> value) {
                return value.f0.intValue();
            }
        }).collect();
        assertEquals(11, result.size());
        Collections.sort(result);
        assertEquals(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), result);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Test(org.junit.Test)

Example 17 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class ConnectedComponentsWithDeferredUpdateITCase method testProgram.

@Override
protected void testProgram() throws Exception {
    boolean extraMapper = config.getBoolean("ExtraMapper", false);
    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // read vertex and edge data
    DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
    DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).flatMap(new ConnectedComponents.UndirectEdge());
    // assign the initial components (equal to the vertex id)
    DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
    // open a delta iteration
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
    // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
    DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()).groupBy(0).aggregate(Aggregations.MIN, 1).join(iteration.getSolutionSet()).where(0).equalTo(0).with(new UpdateComponentIdMatchNonPreserving());
    DataSet<Tuple2<Long, Long>> delta;
    if (extraMapper) {
        delta = changes.map(// ID Mapper
        new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
                return v;
            }
        });
    } else {
        delta = changes;
    }
    // close the delta iteration (delta and new workset are identical)
    DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);
    result.writeAsCsv(resultPath, "\n", " ");
    // execute program
    env.execute("Connected Components Example");
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) Tuple1(org.apache.flink.api.java.tuple.Tuple1) ConnectedComponents(org.apache.flink.examples.java.graph.ConnectedComponents) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 18 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class RollingSinkITCase method testNonRollingStringWriter.

/**
	 * This tests {@link StringWriter} with
	 * non-rolling output.
	 */
@Test
public void testNonRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
    RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
    source.map(new MapFunction<Tuple2<Integer, String>, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Tuple2<Integer, String> value) throws Exception {
            return value.f1;
        }
    }).addSink(sink);
    env.execute("RollingSink String Write Test");
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
    inStream = dfs.open(new Path(outPath + "/part-1-0"));
    br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
}

Also used : Path(org.apache.hadoop.fs.Path) InputStreamReader(java.io.InputStreamReader) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 19 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class RollingSinkITCase method testUserDefinedConfiguration.

/**
	 * This tests user defined hdfs configuration
	 * @throws Exception
     */
@Test
public void testUserDefinedConfiguration() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-with-config";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
    Configuration conf = new Configuration();
    conf.set("io.file.buffer.size", "40960");
    RollingSink<String> sink = new RollingSink<String>(outPath).setFSConfig(conf).setWriter(new StreamWriterWithConfigCheck<String>("io.file.buffer.size", "40960")).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
    source.map(new MapFunction<Tuple2<Integer, String>, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Tuple2<Integer, String> value) throws Exception {
            return value.f1;
        }
    }).addSink(sink);
    env.execute("RollingSink with configuration Test");
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
    inStream = dfs.open(new Path(outPath + "/part-1-0"));
    br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 20 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class KafkaConsumerTestBase method runStartFromKafkaCommitOffsets.

/**
	 * This test first writes a total of 300 records to a test topic, reads the first 150 so that some offsets are
	 * committed to Kafka, and then startup the consumer again to read the remaining records starting from the committed offsets.
	 * The test ensures that whatever offsets were committed to Kafka, the consumer correctly picks them up
	 * and starts at the correct position.
	 */
public void runStartFromKafkaCommitOffsets() throws Exception {
    final int parallelism = 3;
    final int recordsInEachPartition = 300;
    final String topicName = writeSequence("testStartFromKafkaCommitOffsetsTopic", recordsInEachPartition, parallelism, 1);
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    Long o1;
    Long o2;
    Long o3;
    int attempt = 0;
    // make sure that o1, o2, o3 are not all null before proceeding
    do {
        attempt++;
        LOG.info("Attempt " + attempt + " to read records and commit some offsets to Kafka");
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        env.getConfig().disableSysoutLogging();
        env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
        env.setParallelism(parallelism);
        // fast checkpoints to make sure we commit some offsets
        env.enableCheckpointing(20);
        env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), standardProps)).map(new ThrottledMapper<String>(50)).map(new MapFunction<String, Object>() {

            int count = 0;

            @Override
            public Object map(String value) throws Exception {
                count++;
                if (count == 150) {
                    throw new SuccessException();
                }
                return null;
            }
        }).addSink(new DiscardingSink<>());
        tryExecute(env, "Read some records to commit offsets to Kafka");
        o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    } while (o1 == null && o2 == null && o3 == null && attempt < 3);
    if (o1 == null && o2 == null && o3 == null) {
        throw new RuntimeException("No offsets have been committed after 3 attempts");
    }
    LOG.info("Got final committed offsets from Kafka o1={}, o2={}, o3={}", o1, o2, o3);
    final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);
    // whatever offsets were committed for each partition, the consumer should pick
    // them up and start from the correct position so that the remaining records are all read
    HashMap<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset = new HashMap<>();
    partitionsToValuesCountAndStartOffset.put(0, new Tuple2<>((o1 != null) ? (int) (recordsInEachPartition - o1) : recordsInEachPartition, (o1 != null) ? o1.intValue() : 0));
    partitionsToValuesCountAndStartOffset.put(1, new Tuple2<>((o2 != null) ? (int) (recordsInEachPartition - o2) : recordsInEachPartition, (o2 != null) ? o2.intValue() : 0));
    partitionsToValuesCountAndStartOffset.put(2, new Tuple2<>((o3 != null) ? (int) (recordsInEachPartition - o3) : recordsInEachPartition, (o3 != null) ? o3.intValue() : 0));
    readSequence(env2, StartupMode.GROUP_OFFSETS, null, standardProps, topicName, partitionsToValuesCountAndStartOffset);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}

Also used : HashMap(java.util.HashMap) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5