use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class SocketWindowWordCount method main.
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
}).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class TwitterExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
System.out.println("Usage: TwitterExample [--output <path>] " + "[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");
// set up the execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
env.setParallelism(params.getInt("parallelism", 1));
// get input data
DataStream<String> streamSource;
if (params.has(TwitterSource.CONSUMER_KEY) && params.has(TwitterSource.CONSUMER_SECRET) && params.has(TwitterSource.TOKEN) && params.has(TwitterSource.TOKEN_SECRET)) {
streamSource = env.addSource(new TwitterSource(params.getProperties()));
} else {
System.out.println("Executing TwitterStream example with default props.");
System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " + "--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
// get default test text data
streamSource = env.fromElements(TwitterExampleData.TEXTS);
}
DataStream<Tuple2<String, Integer>> tweets = streamSource.flatMap(new SelectEnglishAndTokenizeFlatMap()).keyBy(0).sum(1);
// emit result
if (params.has("output")) {
tweets.writeAsText(params.get("output"));
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
tweets.print();
}
// execute program
env.execute("Twitter Streaming Example");
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class WordCount method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// get input data
DataSet<String> text;
if (params.has("input")) {
// read the text file from given input path
text = env.readTextFile(params.get("input"));
} else {
// get default test text data
System.out.println("Executing WordCount example with default input data set.");
System.out.println("Use --input to specify file input.");
text = WordCountData.getDefaultTextLineDataSet(env);
}
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).sum(1);
// emit result
if (params.has("output")) {
counts.writeAsCsv(params.get("output"), "\n", " ");
// execute program
env.execute("WordCount Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
counts.print();
}
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class IterateExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
// set up input for the stream of integer pairs
// obtain execution environment and set setBufferTimeout to 1 to enable
// continuous flushing of the output buffers (lowest latency)
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// create input stream of integer pairs
DataStream<Tuple2<Integer, Integer>> inputStream;
if (params.has("input")) {
inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
} else {
System.out.println("Executing Iterate example with default input data set.");
System.out.println("Use --input to specify file input.");
inputStream = env.addSource(new RandomFibonacciSource());
}
// create an iterative data stream from the input with 5 second timeout
IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()).iterate(5000);
// apply the step function to get the next Fibonacci number
// increment the counter and split the output with the output selector
SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()).split(new MySelector());
// close the iteration by selecting the tuples that were directed to the
// 'iterate' channel in the output selector
it.closeWith(step.select("iterate"));
// to produce the final output select the tuples directed to the
// 'output' channel then get the input pairs that have the greatest iteration counter
// on a 1 second sliding window
DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output").map(new OutputMap());
// emit results
if (params.has("output")) {
numbers.writeAsText(params.get("output"));
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
numbers.print();
}
// execute the program
env.execute("Streaming Iteration Example");
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class WriteIntoKafka method main.
public static void main(String[] args) throws Exception {
ParameterTool parameterTool = ParameterTool.fromArgs(args);
if (parameterTool.getNumberOfParameters() < 2) {
System.out.println("Missing parameters!");
System.out.println("Usage: Kafka --topic <topic> --bootstrap.servers <kafka brokers>");
return;
}
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
// very simple data generator
DataStream<String> messageStream = env.addSource(new SourceFunction<String>() {
private static final long serialVersionUID = 6369260445318862378L;
public boolean running = true;
@Override
public void run(SourceContext<String> ctx) throws Exception {
long i = 0;
while (this.running) {
ctx.collect("Element - " + i++);
Thread.sleep(500);
}
}
@Override
public void cancel() {
running = false;
}
});
// write data into Kafka
messageStream.addSink(new FlinkKafkaProducer08<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));
env.execute("Write into Kafka example");
}
Aggregations