use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.
the class KafkaProducerTestBase method runCustomPartitioningTest.
/**
*
* <pre>
* +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
* / | \
* / | \
* (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
* \ | /
* \ | /
* +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
* </pre>
*
* The mapper validates that the values come consistently from the correct Kafka partition.
*
* The final sink validates that there are no duplicates and that all partitions are present.
*/
public void runCustomPartitioningTest() {
try {
LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
final String topic = "customPartitioningTestTopic";
final int parallelism = 3;
createTestTopic(topic, parallelism, 1);
TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
// ------ producing topology ---------
// source has DOP 1 to make sure it generates no duplicates
DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
long cnt = 0;
while (running) {
ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(1);
Properties props = new Properties();
props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
props.putAll(secureProps);
// sink partitions into
kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
// ------ consuming topology ---------
Properties consumerProps = new Properties();
consumerProps.putAll(standardProps);
consumerProps.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {
private int ourPartition = -1;
@Override
public Integer map(Tuple2<Long, String> value) {
int partition = value.f0.intValue() % parallelism;
if (ourPartition != -1) {
assertEquals("inconsistent partitioning", ourPartition, partition);
} else {
ourPartition = partition;
}
return partition;
}
}).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {
private int[] valuesPerPartition = new int[parallelism];
@Override
public void invoke(Integer value) throws Exception {
valuesPerPartition[value]++;
boolean missing = false;
for (int i : valuesPerPartition) {
if (i < 100) {
missing = true;
break;
}
}
if (!missing) {
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "custom partitioning test");
deleteTestTopic(topic);
LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.
the class HBaseWriteExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).sum(1);
// emit result
Job job = Job.getInstance();
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
// TODO is "mapred.output.dir" really useful?
job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
counts.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {
private transient Tuple2<Text, Mutation> reuse;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
reuse = new Tuple2<Text, Mutation>();
}
@Override
public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
reuse.f0 = new Text(t.f0);
Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
reuse.f1 = put;
return reuse;
}
}).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));
// execute program
env.execute("WordCount (HBase sink) Example");
}
use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.
the class TaskManagerProcessFailureBatchRecoveryITCase method testTaskManagerFailure.
// --------------------------------------------------------------------------------------------
// Test the program
// --------------------------------------------------------------------------------------------
@Override
public void testTaskManagerFailure(int jobManagerPort, final File coordinateDir) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jobManagerPort);
env.setParallelism(PARALLELISM);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 10000));
env.getConfig().setExecutionMode(executionMode);
env.getConfig().disableSysoutLogging();
final long NUM_ELEMENTS = 100000L;
final DataSet<Long> result = env.generateSequence(1, NUM_ELEMENTS).rebalance().map(new RichMapFunction<Long, Long>() {
private final File proceedFile = new File(coordinateDir, PROCEED_MARKER_FILE);
private boolean markerCreated = false;
private boolean checkForProceedFile = true;
@Override
public Long map(Long value) throws Exception {
if (!markerCreated) {
int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
touchFile(new File(coordinateDir, READY_MARKER_FILE_PREFIX + taskIndex));
markerCreated = true;
}
// check if the proceed file exists
if (checkForProceedFile) {
if (proceedFile.exists()) {
checkForProceedFile = false;
} else {
// otherwise wait so that we make slow progress
Thread.sleep(100);
}
}
return value;
}
}).reduce(new ReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2) {
return value1 + value2;
}
});
long sum = result.collect().get(0);
assertEquals(NUM_ELEMENTS * (NUM_ELEMENTS + 1L) / 2L, sum);
}
use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.
the class StreamingOperatorsITCase method testGroupedFoldOperation.
/**
* Tests the proper functioning of the streaming fold operator. For this purpose, a stream
* of Tuple2<Integer, Integer> is created. The stream is grouped according to the first tuple
* value. Each group is folded where the second tuple value is summed up.
*
* This test relies on the hash function used by the {@link DataStream#keyBy}, which is
* assumed to be {@link MathUtils#murmurHash}.
*/
@Test
public void testGroupedFoldOperation() throws Exception {
int numElements = 10;
final int numKeys = 2;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer, Integer>> sourceStream = env.addSource(new TupleSource(numElements, numKeys));
SplitStream<Tuple2<Integer, Integer>> splittedResult = sourceStream.keyBy(0).fold(0, new FoldFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 4875723041825726082L;
@Override
public Integer fold(Integer accumulator, Tuple2<Integer, Integer> value) throws Exception {
return accumulator + value.f1;
}
}).map(new RichMapFunction<Integer, Tuple2<Integer, Integer>>() {
private static final long serialVersionUID = 8538355101606319744L;
int key = -1;
@Override
public Tuple2<Integer, Integer> map(Integer value) throws Exception {
if (key == -1) {
key = MathUtils.murmurHash(value) % numKeys;
}
return new Tuple2<>(key, value);
}
}).split(new OutputSelector<Tuple2<Integer, Integer>>() {
private static final long serialVersionUID = -8439325199163362470L;
@Override
public Iterable<String> select(Tuple2<Integer, Integer> value) {
List<String> output = new ArrayList<>();
output.add(value.f0 + "");
return output;
}
});
final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0);
final List<Integer> actualResult1 = new ArrayList<>();
MemorySinkFunction.registerCollection(0, actualResult1);
splittedResult.select("0").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 2114608668010092995L;
@Override
public Integer map(Tuple2<Integer, Integer> value) throws Exception {
return value.f1;
}
}).addSink(sinkFunction1);
final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1);
final List<Integer> actualResult2 = new ArrayList<>();
MemorySinkFunction.registerCollection(1, actualResult2);
splittedResult.select("1").map(new MapFunction<Tuple2<Integer, Integer>, Integer>() {
private static final long serialVersionUID = 5631104389744681308L;
@Override
public Integer map(Tuple2<Integer, Integer> value) throws Exception {
return value.f1;
}
}).addSink(sinkFunction2);
Collection<Integer> expected1 = new ArrayList<>(10);
Collection<Integer> expected2 = new ArrayList<>(10);
int counter1 = 0;
int counter2 = 0;
for (int i = 0; i < numElements; i++) {
if (MathUtils.murmurHash(i) % numKeys == 0) {
counter1 += i;
expected1.add(counter1);
} else {
counter2 += i;
expected2.add(counter2);
}
}
env.execute();
Collections.sort(actualResult1);
Collections.sort(actualResult2);
Assert.assertEquals(expected1, actualResult1);
Assert.assertEquals(expected2, actualResult2);
MemorySinkFunction.clear();
}
use of org.apache.flink.api.common.functions.RichMapFunction in project flink by apache.
the class AsyncWaitOperatorTest method createChainedVertex.
private JobVertex createChainedVertex(boolean withLazyFunction) {
StreamExecutionEnvironment chainEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// the input is only used to construct a chained operator, and they will not be used in the real tests.
DataStream<Integer> input = chainEnv.fromElements(1, 2, 3);
if (withLazyFunction) {
input = AsyncDataStream.orderedWait(input, new LazyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 6);
} else {
input = AsyncDataStream.orderedWait(input, new MyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 6);
}
// the map function is designed to chain after async function. we place an Integer object in it and
// it is initialized in the open() method.
// it is used to verify that operators in the operator chain should be opened from the tail to the head,
// so the result from AsyncWaitOperator can pass down successfully and correctly.
// if not, the test can not be passed.
input = input.map(new RichMapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
private Integer initialValue = null;
@Override
public void open(Configuration parameters) throws Exception {
initialValue = 1;
}
@Override
public Integer map(Integer value) throws Exception {
return initialValue + value;
}
});
input = AsyncDataStream.unorderedWait(input, new MyAsyncFunction(), TIMEOUT, TimeUnit.MILLISECONDS, 3);
input.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 5162085254238405527L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).startNewChain().addSink(new DiscardingSink<Integer>());
// be build our own OperatorChain
final JobGraph jobGraph = chainEnv.getStreamGraph().getJobGraph();
Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 3);
return jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
}
Aggregations