Example 21 with ParameterTool

the class WordCount method main.

// *************************************************************************
// *************************************************************************
public static void main(String[] args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    // get input data
    DataStream<String> text;
    if (params.has("input")) {
        // read the text file from given input path
        text = env.readTextFile(params.get("input"));
    } else {
        System.out.println("Executing WordCount example with default input data set.");
        System.out.println("Use --input to specify file input.");
        // get default test text data
        text = env.fromElements(WordCountData.WORDS);
    DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).keyBy(0).sum(1);
    // emit result
    if (params.has("output")) {
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
    // execute program
    env.execute("Streaming WordCount");
Example 22 with ParameterTool

the class ManualConsumerProducerTest method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> simpleStringStream = see.addSource(new ProduceIntoKinesis.EventsGenerator());
    Properties kinesisProducerConfig = new Properties();
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new KinesisSerializationSchema<String>() {

        public ByteBuffer serialize(String element) {
            return ByteBuffer.wrap(element.getBytes(ConfigConstants.DEFAULT_CHARSET));

        // every 10th element goes into a different stream
        public String getTargetStream(String element) {
            if (element.split("-")[0].endsWith("0")) {
                return "flink-test-2";
            // send to default stream
            return null;
    }, kinesisProducerConfig);
    kinesis.setCustomPartitioner(new KinesisPartitioner<String>() {

        public String getPartitionId(String element) {
            int l = element.length();
            return element.substring(l - 1, l);
    // consuming topology
    Properties consumerProps = new Properties();
    consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
    DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>("test-flink", new SimpleStringSchema(), consumerProps));
    // validate consumed records for correctness
    consuming.flatMap(new FlatMapFunction<String, String>() {

        public void flatMap(String value, Collector<String> out) throws Exception {
            String[] parts = value.split("-");
            try {
                long l = Long.parseLong(parts[0]);
                if (l < 0) {
                    throw new RuntimeException("Negative");
            } catch (NumberFormatException nfe) {
                throw new RuntimeException("First part of '" + value + "' is not a valid numeric type");
            if (parts[1].length() != 12) {
                throw new RuntimeException("Second part of '" + value + "' doesn't have 12 characters");
Example 23 with ParameterTool

the class ManualExactlyOnceWithStreamReshardingTest method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);"Starting exactly once with stream resharding test");
    final String streamName = "flink-test-" + UUID.randomUUID().toString();
    final String accessKey = pt.getRequired("accessKey");
    final String secretKey = pt.getRequired("secretKey");
    final String region = pt.getRequired("region");
    final Properties configProps = new Properties();
    configProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, accessKey);
    configProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, secretKey);
    configProps.setProperty(ConsumerConfigConstants.AWS_REGION, region);
    configProps.setProperty(ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS, "0");
    final AmazonKinesisClient client = AWSUtil.createKinesisClient(configProps);
    // the stream is first created with 1 shard
    client.createStream(streamName, 1);
    // wait until stream has been created
    DescribeStreamResult status = client.describeStream(streamName);"status {}", status);
    while (!status.getStreamDescription().getStreamStatus().equals("ACTIVE")) {
        status = client.describeStream(streamName);"Status of stream {}", status);
    final Configuration flinkConfig = new Configuration();
    flinkConfig.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
    flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
    flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
    flinkConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s");
    LocalFlinkMiniCluster flink = new LocalFlinkMiniCluster(flinkConfig, false);
    final int flinkPort = flink.getLeaderRPCPort();
    try {
        // we have to use a manual generator here instead of the FlinkKinesisProducer
        // because the FlinkKinesisProducer currently has a problem where records will be resent to a shard
        // when resharding happens; this affects the consumer exactly-once validation test and will never pass
        final AtomicReference<Throwable> producerError = new AtomicReference<>();
        Runnable manualGenerate = new Runnable() {

            public void run() {
                AmazonKinesisClient client = AWSUtil.createKinesisClient(configProps);
                int count = 0;
                final int batchSize = 30;
                while (true) {
                    try {
                        Set<PutRecordsRequestEntry> batch = new HashSet<>();
                        for (int i = count; i < count + batchSize; i++) {
                            if (i >= TOTAL_EVENT_COUNT) {
                            batch.add(new PutRecordsRequestEntry().withData(ByteBuffer.wrap(((i) + "-" + RandomStringUtils.randomAlphabetic(12)).getBytes(ConfigConstants.DEFAULT_CHARSET))).withPartitionKey(UUID.randomUUID().toString()));
                        count += batchSize;
                        PutRecordsResult result = client.putRecords(new PutRecordsRequest().withStreamName(streamName).withRecords(batch));
                        // and let this test fail
                        if (result.getFailedRecordCount() > 0) {
                            producerError.set(new RuntimeException("The producer has failed records in one of the put batch attempts."));
                        if (count >= TOTAL_EVENT_COUNT) {
                    } catch (Exception e) {
        Thread producerThread = new Thread(manualGenerate);
        final AtomicReference<Throwable> consumerError = new AtomicReference<>();
        Thread consumerThread = ExactlyOnceValidatingConsumerThread.create(TOTAL_EVENT_COUNT, 10000, 2, 500, 500, accessKey, secretKey, region, streamName, consumerError, flinkPort, flinkConfig);
        // reshard the Kinesis stream while the producer / and consumers are running
        Runnable splitShard = new Runnable() {

            public void run() {
                try {
                    // first, split shard in the middle of the hash range
          "Splitting shard ...");
                    client.splitShard(streamName, KinesisShardIdGenerator.generateFromShardOrder(0), "170141183460469231731687303715884105727");
                    // wait until the split shard operation finishes updating ...
                    DescribeStreamResult status;
                    Random rand = new Random();
                    do {
                        status = null;
                        while (status == null) {
                            // retry until we get status
                            try {
                                status = client.describeStream(streamName);
                            } catch (LimitExceededException lee) {
                                LOG.warn("LimitExceededException while describing stream ... retrying ...");
                    } while (!status.getStreamDescription().getStreamStatus().equals("ACTIVE"));
                    // then merge again
          "Merging shards ...");
                    client.mergeShards(streamName, KinesisShardIdGenerator.generateFromShardOrder(1), KinesisShardIdGenerator.generateFromShardOrder(2));
                } catch (InterruptedException iex) {
        Thread splitShardThread = new Thread(splitShard);
        boolean deadlinePassed = false;
        // wait at most for five minutes
        long deadline = System.currentTimeMillis() + (1000 * 5 * 60);
        // wait until both producer and consumer finishes, or an unexpected error is thrown
        while ((consumerThread.isAlive() || producerThread.isAlive()) && (producerError.get() == null && consumerError.get() == null)) {
            if (System.currentTimeMillis() >= deadline) {
                LOG.warn("Deadline passed");
                deadlinePassed = true;
                // enough waiting
        if (producerThread.isAlive()) {
        if (consumerThread.isAlive()) {
        if (producerError.get() != null) {
  "+++ TEST failed! +++");
            throw new RuntimeException("Producer failed", producerError.get());
        if (consumerError.get() != null) {
  "+++ TEST failed! +++");
            throw new RuntimeException("Consumer failed", consumerError.get());
        if (!deadlinePassed) {
  "+++ TEST passed! +++");
        } else {
  "+++ TEST failed! +++");
    } finally {
        // stopping flink
Example 24 with ParameterTool

the class ManualProducerTest method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> simpleStringStream = see.addSource(new ProduceIntoKinesis.EventsGenerator());
    Properties kinesisProducerConfig = new Properties();
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new KinesisSerializationSchema<String>() {

        public ByteBuffer serialize(String element) {
            return ByteBuffer.wrap(element.getBytes(ConfigConstants.DEFAULT_CHARSET));

        // every 10th element goes into a different stream
        public String getTargetStream(String element) {
            if (element.split("-")[0].endsWith("0")) {
                return "flink-test-2";
            // send to default stream
            return null;
    }, kinesisProducerConfig);
    kinesis.setCustomPartitioner(new KinesisPartitioner<String>() {

        public String getPartitionId(String element) {
            int l = element.length();
            return element.substring(l - 1, l);
Example 25 with ParameterTool

the class KMeansDataGenerator method main.

	 * Main method to generate data for the {@link KMeans} example program.
	 * <p>
	 * The generator creates to files:
	 * <ul>
	 * <li><code>&lt; output-path &gt;/points</code> for the data points
	 * <li><code>&lt; output-path &gt;/centers</code> for the cluster centers
	 * </ul> 
	 * @param args 
	 * <ol>
	 * <li>Int: Number of data points
	 * <li>Int: Number of cluster centers
	 * <li><b>Optional</b> String: Output path, default value is {tmp.dir}
	 * <li><b>Optional</b> Double: Standard deviation of data points
	 * <li><b>Optional</b> Double: Value range of cluster centers
	 * <li><b>Optional</b> Long: Random seed
	 * </ol>
	 * @throws IOException
public static void main(String[] args) throws IOException {
    // check parameter count
    if (args.length < 2) {
        System.out.println("KMeansDataGenerator -points <num> -k <num clusters> [-output <output-path>] [-stddev <relative stddev>] [-range <centroid range>] [-seed <seed>]");
    // parse parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    final int numDataPoints = params.getInt("points");
    final int k = params.getInt("k");
    final String outDir = params.get("output", System.getProperty(""));
    final double stddev = params.getDouble("stddev", RELATIVE_STDDEV);
    final double range = params.getDouble("range", DEFAULT_VALUE_RANGE);
    final long firstSeed = params.getLong("seed", DEFAULT_SEED);
    final double absoluteStdDev = stddev * range;
    final Random random = new Random(firstSeed);
    // the means around which data points are distributed
    final double[][] means = uniformRandomCenters(random, k, DIMENSIONALITY, range);
    // write the points out
    BufferedWriter pointsOut = null;
    try {
        pointsOut = new BufferedWriter(new FileWriter(new File(outDir + "/" + POINTS_FILE)));
        StringBuilder buffer = new StringBuilder();
        double[] point = new double[DIMENSIONALITY];
        int nextCentroid = 0;
        for (int i = 1; i <= numDataPoints; i++) {
            // generate a point for the current centroid
            double[] centroid = means[nextCentroid];
            for (int d = 0; d < DIMENSIONALITY; d++) {
                point[d] = (random.nextGaussian() * absoluteStdDev) + centroid[d];
            writePoint(point, buffer, pointsOut);
            nextCentroid = (nextCentroid + 1) % k;
    } finally {
        if (pointsOut != null) {
    // write the uniformly distributed centers to a file
    BufferedWriter centersOut = null;
    try {
        centersOut = new BufferedWriter(new FileWriter(new File(outDir + "/" + CENTERS_FILE)));
        StringBuilder buffer = new StringBuilder();
        double[][] centers = uniformRandomCenters(random, k, DIMENSIONALITY, range);
        for (int i = 0; i < k; i++) {
            writeCenter(i + 1, centers[i], buffer, centersOut);
    } finally {
        if (centersOut != null) {
    System.out.println("Wrote " + numDataPoints + " data points to " + outDir + "/" + POINTS_FILE);
    System.out.println("Wrote " + k + " cluster centers to " + outDir + "/" + CENTERS_FILE);
