Search in sources :

Example 36 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class KafkaProducerTestBase method runCustomPartitioningTest.

/**
	 * 
	 * <pre>
	 *             +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
	 *            /                  |                                       \
	 *           /                   |                                        \
	 * (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
	 *           \                   |                                        /
	 *            \                  |                                       /
	 *             +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
	 * </pre>
	 * 
	 * The mapper validates that the values come consistently from the correct Kafka partition.
	 * 
	 * The final sink validates that there are no duplicates and that all partitions are present.
	 */
public void runCustomPartitioningTest() {
    try {
        LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
        final String topic = "customPartitioningTestTopic";
        final int parallelism = 3;
        createTestTopic(topic, parallelism, 1);
        TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        env.setRestartStrategy(RestartStrategies.noRestart());
        env.getConfig().disableSysoutLogging();
        TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        // ------ producing topology ---------
        // source has DOP 1 to make sure it generates no duplicates
        DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {

            private boolean running = true;

            @Override
            public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
                long cnt = 0;
                while (running) {
                    ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
                    cnt++;
                }
            }

            @Override
            public void cancel() {
                running = false;
            }
        }).setParallelism(1);
        Properties props = new Properties();
        props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
        props.putAll(secureProps);
        // sink partitions into 
        kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
        // ------ consuming topology ---------
        Properties consumerProps = new Properties();
        consumerProps.putAll(standardProps);
        consumerProps.putAll(secureProps);
        FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
        env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {

            private int ourPartition = -1;

            @Override
            public Integer map(Tuple2<Long, String> value) {
                int partition = value.f0.intValue() % parallelism;
                if (ourPartition != -1) {
                    assertEquals("inconsistent partitioning", ourPartition, partition);
                } else {
                    ourPartition = partition;
                }
                return partition;
            }
        }).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {

            private int[] valuesPerPartition = new int[parallelism];

            @Override
            public void invoke(Integer value) throws Exception {
                valuesPerPartition[value]++;
                boolean missing = false;
                for (int i : valuesPerPartition) {
                    if (i < 100) {
                        missing = true;
                        break;
                    }
                }
                if (!missing) {
                    throw new SuccessException();
                }
            }
        }).setParallelism(1);
        tryExecute(env, "custom partitioning test");
        deleteTestTopic(topic);
        LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) KeyedSerializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper) Properties(java.util.Properties) SuccessException(org.apache.flink.test.util.SuccessException) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 37 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class KafkaShortRetentionTestBase method runFailOnAutoOffsetResetNone.

/**
	 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none"
	 * @throws Exception
	 */
public void runFailOnAutoOffsetResetNone() throws Exception {
    final String topic = "auto-offset-reset-none-test";
    final int parallelism = 1;
    kafkaServer.createTestTopic(topic, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flink.getLeaderRPCPort());
    env.setParallelism(parallelism);
    // fail immediately
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();
    // ----------- add consumer ----------
    Properties customProps = new Properties();
    customProps.putAll(standardProps);
    customProps.putAll(secureProps);
    // test that "none" leads to an exception
    customProps.setProperty("auto.offset.reset", "none");
    FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
    DataStreamSource<String> consuming = env.addSource(source);
    consuming.addSink(new DiscardingSink<String>());
    try {
        env.execute("Test auto offset reset none");
    } catch (Throwable e) {
        // check if correct exception has been thrown
        if (// kafka 0.8
        !e.getCause().getCause().getMessage().contains("Unable to find previous offset") && // kafka 0.9
        !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition")) {
            throw e;
        }
    }
    kafkaServer.deleteTestTopic(topic);
}
Also used : SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 38 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class ConsumeFromKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
    DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>("flink-test", new SimpleStringSchema(), kinesisConsumerConfig));
    kinesis.print();
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 39 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class ProduceIntoKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
    Properties kinesisProducerConfig = new Properties();
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
    kinesis.setFailOnError(true);
    kinesis.setDefaultStream("flink-test");
    kinesis.setDefaultPartition("0");
    simpleStringStream.addSink(kinesis);
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 40 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class KinesisEventsGeneratorProducerThread method create.

public static Thread create(final int totalEventCount, final int parallelism, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) {
    Runnable kinesisEventsGeneratorProducer = new Runnable() {

        @Override
        public void run() {
            try {
                StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig);
                see.setParallelism(parallelism);
                // start data generator
                DataStream<String> simpleStringStream = see.addSource(new KinesisEventsGeneratorProducerThread.EventsGenerator(totalEventCount)).setParallelism(1);
                Properties producerProps = new Properties();
                producerProps.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey);
                producerProps.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey);
                producerProps.setProperty(AWSConfigConstants.AWS_REGION, awsRegion);
                FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), producerProps);
                kinesis.setFailOnError(true);
                kinesis.setDefaultStream(kinesisStreamName);
                kinesis.setDefaultPartition("0");
                simpleStringStream.addSink(kinesis);
                LOG.info("Starting producing topology");
                see.execute("Producing topology");
                LOG.info("Producing topo finished");
            } catch (Exception e) {
                LOG.warn("Error while running producing topology", e);
                errorHandler.set(e);
            }
        }
    };
    return new Thread(kinesisEventsGeneratorProducer);
}
Also used : FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19