use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class KafkaProducerTestBase method runCustomPartitioningTest.
/**
*
* <pre>
* +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
* / | \
* / | \
* (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
* \ | /
* \ | /
* +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
* </pre>
*
* The mapper validates that the values come consistently from the correct Kafka partition.
*
* The final sink validates that there are no duplicates and that all partitions are present.
*/
public void runCustomPartitioningTest() {
try {
LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
final String topic = "customPartitioningTestTopic";
final int parallelism = 3;
createTestTopic(topic, parallelism, 1);
TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
// ------ producing topology ---------
// source has DOP 1 to make sure it generates no duplicates
DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
long cnt = 0;
while (running) {
ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(1);
Properties props = new Properties();
props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
props.putAll(secureProps);
// sink partitions into
kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
// ------ consuming topology ---------
Properties consumerProps = new Properties();
consumerProps.putAll(standardProps);
consumerProps.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {
private int ourPartition = -1;
@Override
public Integer map(Tuple2<Long, String> value) {
int partition = value.f0.intValue() % parallelism;
if (ourPartition != -1) {
assertEquals("inconsistent partitioning", ourPartition, partition);
} else {
ourPartition = partition;
}
return partition;
}
}).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {
private int[] valuesPerPartition = new int[parallelism];
@Override
public void invoke(Integer value) throws Exception {
valuesPerPartition[value]++;
boolean missing = false;
for (int i : valuesPerPartition) {
if (i < 100) {
missing = true;
break;
}
}
if (!missing) {
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "custom partitioning test");
deleteTestTopic(topic);
LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class KafkaShortRetentionTestBase method runFailOnAutoOffsetResetNone.
/**
* Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none"
* @throws Exception
*/
public void runFailOnAutoOffsetResetNone() throws Exception {
final String topic = "auto-offset-reset-none-test";
final int parallelism = 1;
kafkaServer.createTestTopic(topic, parallelism, 1);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flink.getLeaderRPCPort());
env.setParallelism(parallelism);
// fail immediately
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
// ----------- add consumer ----------
Properties customProps = new Properties();
customProps.putAll(standardProps);
customProps.putAll(secureProps);
// test that "none" leads to an exception
customProps.setProperty("auto.offset.reset", "none");
FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
DataStreamSource<String> consuming = env.addSource(source);
consuming.addSink(new DiscardingSink<String>());
try {
env.execute("Test auto offset reset none");
} catch (Throwable e) {
// check if correct exception has been thrown
if (// kafka 0.8
!e.getCause().getCause().getMessage().contains("Unable to find previous offset") && // kafka 0.9
!e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition")) {
throw e;
}
}
kafkaServer.deleteTestTopic(topic);
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class ConsumeFromKinesis method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
Properties kinesisConsumerConfig = new Properties();
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>("flink-test", new SimpleStringSchema(), kinesisConsumerConfig));
kinesis.print();
see.execute();
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class ProduceIntoKinesis method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
Properties kinesisProducerConfig = new Properties();
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
kinesis.setFailOnError(true);
kinesis.setDefaultStream("flink-test");
kinesis.setDefaultPartition("0");
simpleStringStream.addSink(kinesis);
see.execute();
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class KinesisEventsGeneratorProducerThread method create.
public static Thread create(final int totalEventCount, final int parallelism, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) {
Runnable kinesisEventsGeneratorProducer = new Runnable() {
@Override
public void run() {
try {
StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig);
see.setParallelism(parallelism);
// start data generator
DataStream<String> simpleStringStream = see.addSource(new KinesisEventsGeneratorProducerThread.EventsGenerator(totalEventCount)).setParallelism(1);
Properties producerProps = new Properties();
producerProps.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey);
producerProps.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey);
producerProps.setProperty(AWSConfigConstants.AWS_REGION, awsRegion);
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), producerProps);
kinesis.setFailOnError(true);
kinesis.setDefaultStream(kinesisStreamName);
kinesis.setDefaultPartition("0");
simpleStringStream.addSink(kinesis);
LOG.info("Starting producing topology");
see.execute("Producing topology");
LOG.info("Producing topo finished");
} catch (Exception e) {
LOG.warn("Error while running producing topology", e);
errorHandler.set(e);
}
}
};
return new Thread(kinesisEventsGeneratorProducer);
}
Aggregations