Search in sources :

Example 56 with Collector

use of org.apache.flink.util.Collector in project rocketmq-externals by apache.

the class RocketMQFlinkExample method main.

public static void main(String[] args) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // enable checkpoint
    env.enableCheckpointing(3000);
    Properties consumerProps = new Properties();
    consumerProps.setProperty(RocketMQConfig.NAME_SERVER_ADDR, "localhost:9876");
    consumerProps.setProperty(RocketMQConfig.CONSUMER_GROUP, "c002");
    consumerProps.setProperty(RocketMQConfig.CONSUMER_TOPIC, "flink-source2");
    Properties producerProps = new Properties();
    producerProps.setProperty(RocketMQConfig.NAME_SERVER_ADDR, "localhost:9876");
    env.addSource(new RocketMQSource(new SimpleKeyValueDeserializationSchema("id", "address"), consumerProps)).name("rocketmq-source").setParallelism(2).process(new ProcessFunction<Map, Map>() {

        @Override
        public void processElement(Map in, Context ctx, Collector<Map> out) throws Exception {
            HashMap result = new HashMap();
            result.put("id", in.get("id"));
            String[] arr = in.get("address").toString().split("\\s+");
            result.put("province", arr[arr.length - 1]);
            out.collect(result);
        }
    }).name("upper-processor").setParallelism(2).addSink(new RocketMQSink(new SimpleKeyValueSerializationSchema("id", "province"), new DefaultTopicSelector("flink-sink2"), producerProps).withBatchFlushOnCheckpoint(true)).name("rocketmq-sink").setParallelism(2);
    try {
        env.execute("rocketmq-flink-example");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : HashMap(java.util.HashMap) SimpleKeyValueDeserializationSchema(org.apache.rocketmq.flink.common.serialization.SimpleKeyValueDeserializationSchema) Properties(java.util.Properties) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) RocketMQSink(org.apache.rocketmq.flink.RocketMQSink) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) DefaultTopicSelector(org.apache.rocketmq.flink.common.selector.DefaultTopicSelector) RocketMQSource(org.apache.rocketmq.flink.RocketMQSource) SimpleKeyValueSerializationSchema(org.apache.rocketmq.flink.common.serialization.SimpleKeyValueSerializationSchema) Map(java.util.Map) HashMap(java.util.HashMap)

Example 57 with Collector

use of org.apache.flink.util.Collector in project trace-stream-engine by nyomio.

the class ExampleFlinkJobJava method main.

public static void main(String[] args) throws Exception {
    // the port to connect to
    final int port;
    try {
        final ParameterTool params = ParameterTool.fromArgs(args);
        port = params.getInt("port");
    } catch (Exception e) {
        System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'");
        return;
    }
    // get the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data by connecting to the socket
    DataStream<String> text = env.socketTextStream("localhost", port, "\n");
    // parse the data, group it, window it, and aggregate the counts
    DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {

        @Override
        public void flatMap(String value, Collector<WordWithCount> out) {
            for (String word : value.split("\\s")) {
                out.collect(new WordWithCount(word, 1L));
            }
        }
    }).keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1)).reduce(new ReduceFunction<WordWithCount>() {

        @Override
        public WordWithCount reduce(WordWithCount a, WordWithCount b) {
            return new WordWithCount(a.word, a.count + b.count);
        }
    });
    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1);
    env.execute("Socket Window WordCount");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 58 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class KafkaConsumerTestBase method readSequence.

// ------------------------------------------------------------------------
// Reading writing test data sets
// ------------------------------------------------------------------------
/**
 * Runs a job using the provided environment to read a sequence of records from a single Kafka
 * topic. The method allows to individually specify the expected starting offset and total read
 * value count of each partition. The job will be considered successful only if all partition
 * read results match the start offset and value count criteria.
 */
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Long startupTimestamp, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
    final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
    int finalCountTmp = 0;
    for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
        finalCountTmp += valuesCountAndStartOffset.getValue().f0;
    }
    final int finalCount = finalCountTmp;
    final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
    // create the consumer
    cc.putAll(secureProps);
    DataStreamSource<Tuple2<Integer, Integer>> source;
    if (useNewSource) {
        KafkaSourceBuilder<Tuple2<Integer, Integer>> sourceBuilder = kafkaServer.getSourceBuilder(topicName, deser, cc);
        Map<TopicPartition, Long> startOffsets = new HashMap<>();
        if (specificStartupOffsets != null) {
            specificStartupOffsets.forEach((ktp, offset) -> startOffsets.put(new TopicPartition(ktp.getTopic(), ktp.getPartition()), offset));
        }
        setKafkaSourceOffset(startupMode, sourceBuilder, startOffsets, startupTimestamp);
        source = env.fromSource(sourceBuilder.build(), WatermarkStrategy.noWatermarks(), "KafkaSource");
    } else {
        FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
        setKafkaConsumerOffset(startupMode, consumer, specificStartupOffsets, startupTimestamp);
        source = env.addSource(consumer);
    }
    source.setParallelism(sourceParallelism).map(new ThrottledMapper<>(20)).setParallelism(sourceParallelism).flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {

        private HashMap<Integer, BitSet> partitionsToValueCheck;

        private int count = 0;

        @Override
        public void open(Configuration parameters) throws Exception {
            partitionsToValueCheck = new HashMap<>();
            for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
                partitionsToValueCheck.put(partition, new BitSet());
            }
        }

        @Override
        public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
            int partition = value.f0;
            int val = value.f1;
            BitSet bitSet = partitionsToValueCheck.get(partition);
            if (bitSet == null) {
                throw new RuntimeException("Got a record from an unknown partition");
            } else {
                bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
            }
            count++;
            LOG.info("Received message {}, total {} messages", value, count);
            // verify if we've seen everything
            if (count == finalCount) {
                for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
                    BitSet check = partitionsToValueCheck.getValue();
                    int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
                    if (check.cardinality() != expectedValueCount) {
                        throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
                    } else if (check.nextClearBit(0) != expectedValueCount) {
                        throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
                    }
                }
                // test has passed
                throw new SuccessException();
            }
        }
    }).setParallelism(1);
    tryExecute(env, "Read data from Kafka");
    LOG.info("Successfully read sequence for verification");
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) Collector(org.apache.flink.util.Collector) BitSet(java.util.BitSet) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) SuccessException(org.apache.flink.test.util.SuccessException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 59 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class TransitiveClosureNaive method main.

public static void main(String... args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    final int maxIterations = params.getInt("iterations", 10);
    DataSet<Tuple2<Long, Long>> edges;
    if (params.has("edges")) {
        edges = env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
    } else {
        System.out.println("Executing TransitiveClosureNaive example with default edges data set.");
        System.out.println("Use --edges to specify file input.");
        edges = ConnectedComponentsData.getDefaultEdgeDataSet(env);
    }
    IterativeDataSet<Tuple2<Long, Long>> paths = edges.iterate(maxIterations);
    DataSet<Tuple2<Long, Long>> nextPaths = paths.join(edges).where(1).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public /**
         * left: Path (z,x) - x is reachable by z right: Edge (x,y) -
         * edge x-->y exists out: Path (z,y) - y is reachable by z
         */
        Tuple2<Long, Long> join(Tuple2<Long, Long> left, Tuple2<Long, Long> right) throws Exception {
            return new Tuple2<Long, Long>(left.f0, right.f1);
        }
    }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("1").union(paths).groupBy(0, 1).reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public void reduce(Iterable<Tuple2<Long, Long>> values, Collector<Tuple2<Long, Long>> out) throws Exception {
            out.collect(values.iterator().next());
        }
    }).withForwardedFields("0;1");
    DataSet<Tuple2<Long, Long>> newPaths = paths.coGroup(nextPaths).where(0).equalTo(0).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        Set<Tuple2<Long, Long>> prevSet = new HashSet<Tuple2<Long, Long>>();

        @Override
        public void coGroup(Iterable<Tuple2<Long, Long>> prevPaths, Iterable<Tuple2<Long, Long>> nextPaths, Collector<Tuple2<Long, Long>> out) throws Exception {
            for (Tuple2<Long, Long> prev : prevPaths) {
                prevSet.add(prev);
            }
            for (Tuple2<Long, Long> next : nextPaths) {
                if (!prevSet.contains(next)) {
                    out.collect(next);
                }
            }
        }
    }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
    DataSet<Tuple2<Long, Long>> transitiveClosure = paths.closeWith(nextPaths, newPaths);
    // emit result
    if (params.has("output")) {
        transitiveClosure.writeAsCsv(params.get("output"), "\n", " ");
        // execute program explicitly, because file sinks are lazy
        env.execute("Transitive Closure Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        transitiveClosure.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) HashSet(java.util.HashSet)

Example 60 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class InnerJoinOperatorBaseTest method testTupleBaseJoiner.

@Test
public void testTupleBaseJoiner() {
    final FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>> joiner = new FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>() {

        @Override
        public void join(Tuple3<String, Double, Integer> first, Tuple2<Integer, String> second, Collector<Tuple2<Double, String>> out) {
            assertEquals(first.f0, second.f1);
            assertEquals(first.f2, second.f0);
            out.collect(new Tuple2<>(first.f1, second.f0.toString()));
        }
    };
    final TupleTypeInfo<Tuple3<String, Double, Integer>> leftTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Double.class, Integer.class);
    final TupleTypeInfo<Tuple2<Integer, String>> rightTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, String.class);
    final TupleTypeInfo<Tuple2<Double, String>> outTypeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Double.class, String.class);
    final int[] leftKeys = new int[] { 0, 2 };
    final int[] rightKeys = new int[] { 1, 0 };
    final String taskName = "Collection based tuple joiner";
    final BinaryOperatorInformation<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>> binaryOpInfo = new BinaryOperatorInformation<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>(leftTypeInfo, rightTypeInfo, outTypeInfo);
    final InnerJoinOperatorBase<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>, FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>> base = new InnerJoinOperatorBase<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>, FlatJoinFunction<Tuple3<String, Double, Integer>, Tuple2<Integer, String>, Tuple2<Double, String>>>(joiner, binaryOpInfo, leftKeys, rightKeys, taskName);
    final List<Tuple3<String, Double, Integer>> inputData1 = new ArrayList<Tuple3<String, Double, Integer>>(Arrays.asList(new Tuple3<>("foo", 42.0, 1), new Tuple3<>("bar", 1.0, 2), new Tuple3<>("bar", 2.0, 3), new Tuple3<>("foobar", 3.0, 4), new Tuple3<>("bar", 3.0, 3)));
    final List<Tuple2<Integer, String>> inputData2 = new ArrayList<Tuple2<Integer, String>>(Arrays.asList(new Tuple2<>(3, "bar"), new Tuple2<>(4, "foobar"), new Tuple2<>(2, "foo")));
    final Set<Tuple2<Double, String>> expected = new HashSet<Tuple2<Double, String>>(Arrays.asList(new Tuple2<>(2.0, "3"), new Tuple2<>(3.0, "3"), new Tuple2<>(3.0, "4")));
    try {
        final TaskInfo taskInfo = new TaskInfo("op", 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<Double, String>> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<Double, String>> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        assertEquals(expected, new HashSet<>(resultSafe));
        assertEquals(expected, new HashSet<>(resultRegular));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FlatJoinFunction(org.apache.flink.api.common.functions.FlatJoinFunction) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) BinaryOperatorInformation(org.apache.flink.api.common.operators.BinaryOperatorInformation) HashSet(java.util.HashSet) Path(org.apache.flink.core.fs.Path) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Aggregations

Collector (org.apache.flink.util.Collector)80 Test (org.junit.Test)60 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)33 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)32 Configuration (org.apache.flink.configuration.Configuration)27 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)19 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)18 ArrayList (java.util.ArrayList)16 DataStream (org.apache.flink.streaming.api.datastream.DataStream)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)16 HashMap (java.util.HashMap)14 List (java.util.List)14 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 IOException (java.io.IOException)11 Arrays (java.util.Arrays)11 Map (java.util.Map)11 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)11 Assert.assertTrue (org.junit.Assert.assertTrue)11 InternalWindowFunction (org.apache.flink.streaming.runtime.operators.windowing.functions.InternalWindowFunction)10