use of org.apache.flink.util.Collector in project rocketmq-externals by apache.
the class RocketMQFlinkExample method main.
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable checkpoint
env.enableCheckpointing(3000);
Properties consumerProps = new Properties();
consumerProps.setProperty(RocketMQConfig.NAME_SERVER_ADDR, "localhost:9876");
consumerProps.setProperty(RocketMQConfig.CONSUMER_GROUP, "c002");
consumerProps.setProperty(RocketMQConfig.CONSUMER_TOPIC, "flink-source2");
Properties producerProps = new Properties();
producerProps.setProperty(RocketMQConfig.NAME_SERVER_ADDR, "localhost:9876");
env.addSource(new RocketMQSource(new SimpleKeyValueDeserializationSchema("id", "address"), consumerProps)).name("rocketmq-source").setParallelism(2).process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map in, Context ctx, Collector<Map> out) throws Exception {
HashMap result = new HashMap();
result.put("id", in.get("id"));
String[] arr = in.get("address").toString().split("\\s+");
result.put("province", arr[arr.length - 1]);
out.collect(result);
}
}).name("upper-processor").setParallelism(2).addSink(new RocketMQSink(new SimpleKeyValueSerializationSchema("id", "province"), new DefaultTopicSelector("flink-sink2"), producerProps).withBatchFlushOnCheckpoint(true)).name("rocketmq-sink").setParallelism(2);
try {
env.execute("rocketmq-flink-example");
} catch (Exception e) {
e.printStackTrace();
}
}
use of org.apache.flink.util.Collector in project trace-stream-engine by nyomio.
the class ExampleFlinkJobJava method main.
public static void main(String[] args) throws Exception {
// the port to connect to
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream("localhost", port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
}).keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1)).reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class KafkaConsumerTestBase method readSequence.
// ------------------------------------------------------------------------
// Reading writing test data sets
// ------------------------------------------------------------------------
/**
* Runs a job using the provided environment to read a sequence of records from a single Kafka
* topic. The method allows to individually specify the expected starting offset and total read
* value count of each partition. The job will be considered successful only if all partition
* read results match the start offset and value count criteria.
*/
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Long startupTimestamp, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
int finalCountTmp = 0;
for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
finalCountTmp += valuesCountAndStartOffset.getValue().f0;
}
final int finalCount = finalCountTmp;
final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
});
final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
// create the consumer
cc.putAll(secureProps);
DataStreamSource<Tuple2<Integer, Integer>> source;
if (useNewSource) {
KafkaSourceBuilder<Tuple2<Integer, Integer>> sourceBuilder = kafkaServer.getSourceBuilder(topicName, deser, cc);
Map<TopicPartition, Long> startOffsets = new HashMap<>();
if (specificStartupOffsets != null) {
specificStartupOffsets.forEach((ktp, offset) -> startOffsets.put(new TopicPartition(ktp.getTopic(), ktp.getPartition()), offset));
}
setKafkaSourceOffset(startupMode, sourceBuilder, startOffsets, startupTimestamp);
source = env.fromSource(sourceBuilder.build(), WatermarkStrategy.noWatermarks(), "KafkaSource");
} else {
FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
setKafkaConsumerOffset(startupMode, consumer, specificStartupOffsets, startupTimestamp);
source = env.addSource(consumer);
}
source.setParallelism(sourceParallelism).map(new ThrottledMapper<>(20)).setParallelism(sourceParallelism).flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {
private HashMap<Integer, BitSet> partitionsToValueCheck;
private int count = 0;
@Override
public void open(Configuration parameters) throws Exception {
partitionsToValueCheck = new HashMap<>();
for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
partitionsToValueCheck.put(partition, new BitSet());
}
}
@Override
public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
int partition = value.f0;
int val = value.f1;
BitSet bitSet = partitionsToValueCheck.get(partition);
if (bitSet == null) {
throw new RuntimeException("Got a record from an unknown partition");
} else {
bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
}
count++;
LOG.info("Received message {}, total {} messages", value, count);
// verify if we've seen everything
if (count == finalCount) {
for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
BitSet check = partitionsToValueCheck.getValue();
int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
if (check.cardinality() != expectedValueCount) {
throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
} else if (check.nextClearBit(0) != expectedValueCount) {
throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
}
}
// test has passed
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "Read data from Kafka");
LOG.info("Successfully read sequence for verification");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class TransitiveClosureNaive method main.
public static void main(String... args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
final int maxIterations = params.getInt("iterations", 10);
DataSet<Tuple2<Long, Long>> edges;
if (params.has("edges")) {
edges = env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
} else {
System.out.println("Executing TransitiveClosureNaive example with default edges data set.");
System.out.println("Use --edges to specify file input.");
edges = ConnectedComponentsData.getDefaultEdgeDataSet(env);
}
IterativeDataSet<Tuple2<Long, Long>> paths = edges.iterate(maxIterations);
DataSet<Tuple2<Long, Long>> nextPaths = paths.join(edges).where(1).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public /**
* left: Path (z,x) - x is reachable by z right: Edge (x,y) -
* edge x-->y exists out: Path (z,y) - y is reachable by z
*/
Tuple2<Long, Long> join(Tuple2<Long, Long> left, Tuple2<Long, Long> right) throws Exception {
return new Tuple2<Long, Long>(left.f0, right.f1);
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("1").union(paths).groupBy(0, 1).reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void reduce(Iterable<Tuple2<Long, Long>> values, Collector<Tuple2<Long, Long>> out) throws Exception {
out.collect(values.iterator().next());
}
}).withForwardedFields("0;1");
DataSet<Tuple2<Long, Long>> newPaths = paths.coGroup(nextPaths).where(0).equalTo(0).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
Set<Tuple2<Long, Long>> prevSet = new HashSet<Tuple2<Long, Long>>();
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> prevPaths, Iterable<Tuple2<Long, Long>> nextPaths, Collector<Tuple2<Long, Long>> out) throws Exception {
for (Tuple2<Long, Long> prev : prevPaths) {
prevSet.add(prev);
}
for (Tuple2<Long, Long> next : nextPaths) {
if (!prevSet.contains(next)) {
out.collect(next);
}
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
DataSet<Tuple2<Long, Long>> transitiveClosure = paths.closeWith(nextPaths, newPaths);
// emit result
if (params.has("output")) {
transitiveClosure.writeAsCsv(params.get("output"), "\n", " ");
// execute program explicitly, because file sinks are lazy
env.execute("Transitive Closure Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
transitiveClosure.print();
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class InnerJoinOperatorBaseTest method testTupleBaseJoiner.
@Test
public void testTupleBaseJoiner() {
final FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>> joiner = new FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>() {
@Override
public void join(Tuple3<String, Double, Integer> first, Tuple2<Integer, String> second, Collector<Tuple2<Double, String>> out) {
assertEquals(first.f0, second.f1);
assertEquals(first.f2, second.f0);
out.collect(new Tuple2<>(first.f1, second.f0.toString()));
}
};
final TupleTypeInfo<Tuple3<String, Double, Integer>> leftTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Double.class, Integer.class);
final TupleTypeInfo<Tuple2<Integer, String>> rightTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, String.class);
final TupleTypeInfo<Tuple2<Double, String>> outTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Double.class, String.class);
final int[] leftKeys = new int[] { 0, 2 };
final int[] rightKeys = new int[] { 1, 0 };
final String taskName = "Collection based tuple joiner";
final BinaryOperatorInformation<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>> binaryOpInfo = new BinaryOperatorInformation<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>(leftTypeInfo, rightTypeInfo, outTypeInfo);
final InnerJoinOperatorBase<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>, FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>> base = new InnerJoinOperatorBase<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>, FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>>(joiner, binaryOpInfo, leftKeys, rightKeys, taskName);
final List<Tuple3<String, Double, Integer>> inputData1 = new ArrayList<Tuple3<String, Double, Integer>>(Arrays.asList(new Tuple3<>("foo", 42.0, 1), new Tuple3<>("bar", 1.0, 2), new Tuple3<>("bar", 2.0, 3), new Tuple3<>("foobar", 3.0, 4), new Tuple3<>("bar", 3.0, 3)));
final List<Tuple2<Integer, String>> inputData2 = new ArrayList<Tuple2<Integer, String>>(Arrays.asList(new Tuple2<>(3, "bar"), new Tuple2<>(4, "foobar"), new Tuple2<>(2, "foo")));
final Set<Tuple2<Double, String>> expected = new HashSet<Tuple2<Double, String>>(Arrays.asList(new Tuple2<>(2.0, "3"), new Tuple2<>(3.0, "3"), new Tuple2<>(3.0, "4")));
try {
final TaskInfo taskInfo = new TaskInfo("op", 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<Double, String>> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<Double, String>> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
assertEquals(expected, new HashSet<>(resultSafe));
assertEquals(expected, new HashSet<>(resultRegular));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations