Search in sources :

Example 36 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class SocketWindowWordCount method main.

public static void main(String[] args) throws Exception {
    // the host and the port to connect to
    final String hostname;
    final int port;
    try {
        final ParameterTool params = ParameterTool.fromArgs(args);
        hostname = params.has("hostname") ? params.get("hostname") : "localhost";
        port = params.getInt("port");
    } catch (Exception e) {
        System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
        System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
        return;
    }
    // get the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data by connecting to the socket
    DataStream<String> text = env.socketTextStream(hostname, port, "\n");
    // parse the data, group it, window it, and aggregate the counts
    DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {

        @Override
        public void flatMap(String value, Collector<WordWithCount> out) {
            for (String word : value.split("\\s")) {
                out.collect(new WordWithCount(word, 1L));
            }
        }
    }).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {

        @Override
        public WordWithCount reduce(WordWithCount a, WordWithCount b) {
            return new WordWithCount(a.word, a.count + b.count);
        }
    });
    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1);
    env.execute("Socket Window WordCount");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 37 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class TwitterExample method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    System.out.println("Usage: TwitterExample [--output <path>] " + "[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");
    // set up the execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    env.setParallelism(params.getInt("parallelism", 1));
    // get input data
    DataStream<String> streamSource;
    if (params.has(TwitterSource.CONSUMER_KEY) && params.has(TwitterSource.CONSUMER_SECRET) && params.has(TwitterSource.TOKEN) && params.has(TwitterSource.TOKEN_SECRET)) {
        streamSource = env.addSource(new TwitterSource(params.getProperties()));
    } else {
        System.out.println("Executing TwitterStream example with default props.");
        System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " + "--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
        // get default test text data
        streamSource = env.fromElements(TwitterExampleData.TEXTS);
    }
    DataStream<Tuple2<String, Integer>> tweets = streamSource.flatMap(new SelectEnglishAndTokenizeFlatMap()).keyBy(0).sum(1);
    // emit result
    if (params.has("output")) {
        tweets.writeAsText(params.get("output"));
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        tweets.print();
    }
    // execute program
    env.execute("Twitter Streaming Example");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) TwitterSource(org.apache.flink.streaming.connectors.twitter.TwitterSource) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 38 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class WordCount method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    // get input data
    DataSet<String> text;
    if (params.has("input")) {
        // read the text file from given input path
        text = env.readTextFile(params.get("input"));
    } else {
        // get default test text data
        System.out.println("Executing WordCount example with default input data set.");
        System.out.println("Use --input to specify file input.");
        text = WordCountData.getDefaultTextLineDataSet(env);
    }
    DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).groupBy(0).sum(1);
    // emit result
    if (params.has("output")) {
        counts.writeAsCsv(params.get("output"), "\n", " ");
        // execute program
        env.execute("WordCount Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        counts.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 39 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class IterateExample method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up input for the stream of integer pairs
    // obtain execution environment and set setBufferTimeout to 1 to enable
    // continuous flushing of the output buffers (lowest latency)
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    // create input stream of integer pairs
    DataStream<Tuple2<Integer, Integer>> inputStream;
    if (params.has("input")) {
        inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
    } else {
        System.out.println("Executing Iterate example with default input data set.");
        System.out.println("Use --input to specify file input.");
        inputStream = env.addSource(new RandomFibonacciSource());
    }
    // create an iterative data stream from the input with 5 second timeout
    IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()).iterate(5000);
    // apply the step function to get the next Fibonacci number
    // increment the counter and split the output with the output selector
    SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()).split(new MySelector());
    // close the iteration by selecting the tuples that were directed to the
    // 'iterate' channel in the output selector
    it.closeWith(step.select("iterate"));
    // to produce the final output select the tuples directed to the
    // 'output' channel then get the input pairs that have the greatest iteration counter
    // on a 1 second sliding window
    DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output").map(new OutputMap());
    // emit results
    if (params.has("output")) {
        numbers.writeAsText(params.get("output"));
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        numbers.print();
    }
    // execute the program
    env.execute("Streaming Iteration Example");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 40 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class WriteIntoKafka method main.

public static void main(String[] args) throws Exception {
    ParameterTool parameterTool = ParameterTool.fromArgs(args);
    if (parameterTool.getNumberOfParameters() < 2) {
        System.out.println("Missing parameters!");
        System.out.println("Usage: Kafka --topic <topic> --bootstrap.servers <kafka brokers>");
        return;
    }
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
    // very simple data generator
    DataStream<String> messageStream = env.addSource(new SourceFunction<String>() {

        private static final long serialVersionUID = 6369260445318862378L;

        public boolean running = true;

        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            long i = 0;
            while (this.running) {
                ctx.collect("Element - " + i++);
                Thread.sleep(500);
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    });
    // write data into Kafka
    messageStream.addSink(new FlinkKafkaProducer08<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));
    env.execute("Write into Kafka example");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

ParameterTool (org.apache.flink.api.java.utils.ParameterTool)43 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)7 NumberFormat (java.text.NumberFormat)6 Properties (java.util.Properties)6 ProgramParametrizationException (org.apache.flink.client.program.ProgramParametrizationException)6 JDKRandomGeneratorFactory (org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory)6 LongValue (org.apache.flink.types.LongValue)6 NullValue (org.apache.flink.types.NullValue)6 Graph (org.apache.flink.graph.Graph)5 GraphCsvReader (org.apache.flink.graph.GraphCsvReader)5 LongValueToUnsignedIntValue (org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue)5 RMatGraph (org.apache.flink.graph.generator.RMatGraph)5 RandomGenerableFactory (org.apache.flink.graph.generator.random.RandomGenerableFactory)5 SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)5 IntValue (org.apache.flink.types.IntValue)5 StringValue (org.apache.flink.types.StringValue)4 DataSet (org.apache.flink.api.java.DataSet)3