Search in sources :

Example 1 with RichMapFunction

use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.

the class KafkaProducerTestBase method runCustomPartitioningTest.

/**
	 * 
	 * <pre>
	 *             +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
	 *            /                  |                                       \
	 *           /                   |                                        \
	 * (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
	 *           \                   |                                        /
	 *            \                  |                                       /
	 *             +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
	 * </pre>
	 * 
	 * The mapper validates that the values come consistently from the correct Kafka partition.
	 * 
	 * The final sink validates that there are no duplicates and that all partitions are present.
	 */
public void runCustomPartitioningTest() {
    try {
        LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
        final String topic = "customPartitioningTestTopic";
        final int parallelism = 3;
        createTestTopic(topic, parallelism, 1);
        TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        env.setRestartStrategy(RestartStrategies.noRestart());
        env.getConfig().disableSysoutLogging();
        TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        // ------ producing topology ---------
        // source has DOP 1 to make sure it generates no duplicates
        DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {

            private boolean running = true;

            @Override
            public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
                long cnt = 0;
                while (running) {
                    ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
                    cnt++;
                }
            }

            @Override
            public void cancel() {
                running = false;
            }
        }).setParallelism(1);
        Properties props = new Properties();
        props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
        props.putAll(secureProps);
        // sink partitions into 
        kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
        // ------ consuming topology ---------
        Properties consumerProps = new Properties();
        consumerProps.putAll(standardProps);
        consumerProps.putAll(secureProps);
        FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
        env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {

            private int ourPartition = -1;

            @Override
            public Integer map(Tuple2<Long, String> value) {
                int partition = value.f0.intValue() % parallelism;
                if (ourPartition != -1) {
                    assertEquals("inconsistent partitioning", ourPartition, partition);
                } else {
                    ourPartition = partition;
                }
                return partition;
            }
        }).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {

            private int[] valuesPerPartition = new int[parallelism];

            @Override
            public void invoke(Integer value) throws Exception {
                valuesPerPartition[value]++;
                boolean missing = false;
                for (int i : valuesPerPartition) {
                    if (i < 100) {
                        missing = true;
                        break;
                    }
                }
                if (!missing) {
                    throw new SuccessException();
                }
            }
        }).setParallelism(1);
        tryExecute(env, "custom partitioning test");
        deleteTestTopic(topic);
        LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) KeyedSerializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper) Properties(java.util.Properties) SuccessException(org.apache.flink.test.util.SuccessException) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with RichMapFunction

use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.

the class HBaseWriteExample method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = getTextDataSet(env);
    DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).groupBy(0).sum(1);
    // emit result
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
    // TODO is "mapred.output.dir" really useful?
    job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
    counts.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {

        private transient Tuple2<Text, Mutation> reuse;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            reuse = new Tuple2<Text, Mutation>();
        }

        @Override
        public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
            reuse.f0 = new Text(t.f0);
            Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
            put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
            reuse.f1 = put;
            return reuse;
        }
    }).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));
    // execute program
    env.execute("WordCount (HBase sink) Example");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Text(org.apache.hadoop.io.Text) Put(org.apache.hadoop.hbase.client.Put) TableOutputFormat(org.apache.hadoop.hbase.mapreduce.TableOutputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Mutation(org.apache.hadoop.hbase.client.Mutation) Job(org.apache.hadoop.mapreduce.Job)

Example 3 with RichMapFunction

use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.

the class TaskManagerProcessFailureBatchRecoveryITCase method testTaskManagerFailure.

// --------------------------------------------------------------------------------------------
//  Test the program
// --------------------------------------------------------------------------------------------
@Override
public void testTaskManagerFailure(int jobManagerPort, final File coordinateDir) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jobManagerPort);
    env.setParallelism(PARALLELISM);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 10000));
    env.getConfig().setExecutionMode(executionMode);
    env.getConfig().disableSysoutLogging();
    final long NUM_ELEMENTS = 100000L;
    final DataSet<Long> result = env.generateSequence(1, NUM_ELEMENTS).rebalance().map(new RichMapFunction<Long, Long>() {

        private final File proceedFile = new File(coordinateDir, PROCEED_MARKER_FILE);

        private boolean markerCreated = false;

        private boolean checkForProceedFile = true;

        @Override
        public Long map(Long value) throws Exception {
            if (!markerCreated) {
                int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
                touchFile(new File(coordinateDir, READY_MARKER_FILE_PREFIX + taskIndex));
                markerCreated = true;
            }
            // check if the proceed file exists
            if (checkForProceedFile) {
                if (proceedFile.exists()) {
                    checkForProceedFile = false;
                } else {
                    // otherwise wait so that we make slow progress
                    Thread.sleep(100);
                }
            }
            return value;
        }
    }).reduce(new ReduceFunction<Long>() {

        @Override
        public Long reduce(Long value1, Long value2) {
            return value1 + value2;
        }
    });
    long sum = result.collect().get(0);
    assertEquals(NUM_ELEMENTS * (NUM_ELEMENTS + 1L) / 2L, sum);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) File(java.io.File)

Example 4 with RichMapFunction

use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.

the class StreamingOperatorsITCase method testGroupedFoldOperation.

/**
	 * Tests the proper functioning of the streaming fold operator. For this purpose, a stream
	 * of Tuple2<Integer, Integer> is created. The stream is grouped according to the first tuple
	 * value. Each group is folded where the second tuple value is summed up.
	 *
	 * This test relies on the hash function used by the {@link DataStream#keyBy}, which is
	 * assumed to be {@link MathUtils#murmurHash}.
	 */
@Test
public void testGroupedFoldOperation() throws Exception {
    int numElements = 10;
    final int numKeys = 2;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Integer, Integer>> sourceStream = env.addSource(new TupleSource(numElements, numKeys));
    SplitStream<Tuple2<Integer, Integer>> splittedResult = sourceStream.keyBy(0).fold(0, new FoldFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 4875723041825726082L;

        @Override
        public Integer fold(Integer accumulator, Tuple2<Integer, Integer> value) throws Exception {
            return accumulator + value.f1;
        }
    }).map(new RichMapFunction<Integer, Tuple2<Integer, Integer>>() {

        private static final long serialVersionUID = 8538355101606319744L;

        int key = -1;

        @Override
        public Tuple2<Integer, Integer> map(Integer value) throws Exception {
            if (key == -1) {
                key = MathUtils.murmurHash(value) % numKeys;
            }
            return new Tuple2<>(key, value);
        }
    }).split(new OutputSelector<Tuple2<Integer, Integer>>() {

        private static final long serialVersionUID = -8439325199163362470L;

        @Override
        public Iterable<String> select(Tuple2<Integer, Integer> value) {
            List<String> output = new ArrayList<>();
            output.add(value.f0 + "");
            return output;
        }
    });
    final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0);
    final List<Integer> actualResult1 = new ArrayList<>();
    MemorySinkFunction.registerCollection(0, actualResult1);
    splittedResult.select("0").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 2114608668010092995L;

        @Override
        public Integer map(Tuple2<Integer, Integer> value) throws Exception {
            return value.f1;
        }
    }).addSink(sinkFunction1);
    final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1);
    final List<Integer> actualResult2 = new ArrayList<>();
    MemorySinkFunction.registerCollection(1, actualResult2);
    splittedResult.select("1").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 5631104389744681308L;

        @Override
        public Integer map(Tuple2<Integer, Integer> value) throws Exception {
            return value.f1;
        }
    }).addSink(sinkFunction2);
    Collection<Integer> expected1 = new ArrayList<>(10);
    Collection<Integer> expected2 = new ArrayList<>(10);
    int counter1 = 0;
    int counter2 = 0;
    for (int i = 0; i < numElements; i++) {
        if (MathUtils.murmurHash(i) % numKeys == 0) {
            counter1 += i;
            expected1.add(counter1);
        } else {
            counter2 += i;
            expected2.add(counter2);
        }
    }
    env.execute();
    Collections.sort(actualResult1);
    Collections.sort(actualResult2);
    Assert.assertEquals(expected1, actualResult1);
    Assert.assertEquals(expected2, actualResult2);
    MemorySinkFunction.clear();
}
Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 5 with RichMapFunction

use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.

the class AsyncWaitOperatorTest method createChainedVertex.

private JobVertex createChainedVertex(boolean withLazyFunction) {
    StreamExecutionEnvironment chainEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    // the input is only used to construct a chained operator, and they will not be used in the real tests.
    DataStream<Integer> input = chainEnv.fromElements(1, 2, 3);
    if (withLazyFunction) {
        input = AsyncDataStream.orderedWait(input, new LazyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 6);
    } else {
        input = AsyncDataStream.orderedWait(input, new MyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 6);
    }
    // the map function is designed to chain after async function. we place an Integer object in it and
    // it is initialized in the open() method.
    // it is used to verify that operators in the operator chain should be opened from the tail to the head,
    // so the result from AsyncWaitOperator can pass down successfully and correctly.
    // if not, the test can not be passed.
    input = input.map(new RichMapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        private Integer initialValue = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            initialValue = 1;
        }

        @Override
        public Integer map(Integer value) throws Exception {
            return initialValue + value;
        }
    });
    input = AsyncDataStream.unorderedWait(input, new MyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 3);
    input.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 5162085254238405527L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    }).startNewChain().addSink(new DiscardingSink<Integer>());
    // be build our own OperatorChain
    final JobGraph jobGraph = chainEnv.getStreamGraph().getJobGraph();
    Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 3);
    return jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeoutException(java.util.concurrent.TimeoutException) ExecutionException(java.util.concurrent.ExecutionException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)6 Test (org.junit.Test)6 Configuration (org.apache.flink.configuration.Configuration)5 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)4 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)3 File (java.io.File)2 ArrayList (java.util.ArrayList)2 Properties (java.util.Properties)2 TimeoutException (java.util.concurrent.TimeoutException)2 InvalidTypesException (org.apache.flink.api.common.functions.InvalidTypesException)2 MapFunction (org.apache.flink.api.common.functions.MapFunction)2 SuccessException (org.apache.flink.test.util.SuccessException)2 BigInteger (java.math.BigInteger)1 HashMap (java.util.HashMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1