use of org.apache.flink.test.util.SuccessException in project flink by apache.
the class KafkaConsumerTestBase method runKeyValueTest.
public void runKeyValueTest() throws Exception {
final String topic = "keyvaluetest";
createTestTopic(topic, 1, 1);
final int ELEMENT_COUNT = 5000;
// ----------- Write some data into Kafka -------------------
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
@Override
public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
Random rnd = new Random(1337);
for (long i = 0; i < ELEMENT_COUNT; i++) {
PojoValue pojo = new PojoValue();
pojo.when = new Date(rnd.nextLong());
pojo.lon = rnd.nextLong();
pojo.lat = i;
// make every second key null to ensure proper "null" serialization
Long key = (i % 2 == 0) ? null : i;
ctx.collect(new Tuple2<>(key, pojo));
}
}
@Override
public void cancel() {
}
});
KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
producerProperties.setProperty("retries", "3");
kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
env.execute("Write KV to Kafka");
// ----------- Read the data again -------------------
env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
KeyedDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
long counter = 0;
@Override
public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
// the elements should be in order.
Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
if (value.f1.lat % 2 == 0) {
assertNull("key was not null", value.f0);
} else {
Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
}
counter++;
if (counter == ELEMENT_COUNT) {
// we got the right number of elements
throw new SuccessException();
}
}
});
tryExecute(env, "Read KV from Kafka");
deleteTestTopic(topic);
}
use of org.apache.flink.test.util.SuccessException in project flink by apache.
the class KafkaConsumerTestBase method runBigRecordTestTopology.
/**
* Test Flink's Kafka integration also with very big records (30MB)
* see http://stackoverflow.com/questions/21020347/kafka-sending-a-15mb-message
*
*/
public void runBigRecordTestTopology() throws Exception {
final String topic = "bigRecordTestTopic";
// otherwise, the kafka mini clusters may run out of heap space
final int parallelism = 1;
createTestTopic(topic, parallelism, 1);
final TypeInformation<Tuple2<Long, byte[]>> longBytesInfo = TypeInfoParser.parse("Tuple2<Long, byte[]>");
final TypeInformationSerializationSchema<Tuple2<Long, byte[]>> serSchema = new TypeInformationSerializationSchema<>(longBytesInfo, new ExecutionConfig());
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
env.enableCheckpointing(100);
env.setParallelism(parallelism);
// add consuming topology:
Properties consumerProps = new Properties();
consumerProps.putAll(standardProps);
consumerProps.setProperty("fetch.message.max.bytes", Integer.toString(1024 * 1024 * 14));
// for the new fetcher
consumerProps.setProperty("max.partition.fetch.bytes", Integer.toString(1024 * 1024 * 14));
consumerProps.setProperty("queued.max.message.chunks", "1");
consumerProps.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Long, byte[]>> source = kafkaServer.getConsumer(topic, serSchema, consumerProps);
DataStreamSource<Tuple2<Long, byte[]>> consuming = env.addSource(source);
consuming.addSink(new SinkFunction<Tuple2<Long, byte[]>>() {
private int elCnt = 0;
@Override
public void invoke(Tuple2<Long, byte[]> value) throws Exception {
elCnt++;
if (value.f0 == -1) {
// we should have seen 11 elements now.
if (elCnt == 11) {
throw new SuccessException();
} else {
throw new RuntimeException("There have been " + elCnt + " elements");
}
}
if (elCnt > 10) {
throw new RuntimeException("More than 10 elements seen: " + elCnt);
}
}
});
// add producing topology
Properties producerProps = new Properties();
producerProps.setProperty("max.request.size", Integer.toString(1024 * 1024 * 15));
producerProps.setProperty("retries", "3");
producerProps.putAll(secureProps);
producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerConnectionStrings);
DataStream<Tuple2<Long, byte[]>> stream = env.addSource(new RichSourceFunction<Tuple2<Long, byte[]>>() {
private boolean running;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
running = true;
}
@Override
public void run(SourceContext<Tuple2<Long, byte[]>> ctx) throws Exception {
Random rnd = new Random();
long cnt = 0;
int sevenMb = 1024 * 1024 * 7;
while (running) {
byte[] wl = new byte[sevenMb + rnd.nextInt(sevenMb)];
ctx.collect(new Tuple2<>(cnt++, wl));
Thread.sleep(100);
if (cnt == 10) {
// signal end
ctx.collect(new Tuple2<>(-1L, new byte[] { 1 }));
break;
}
}
}
@Override
public void cancel() {
running = false;
}
});
kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), producerProps, null);
tryExecute(env, "big topology test");
deleteTestTopic(topic);
}
use of org.apache.flink.test.util.SuccessException in project flink by apache.
the class KafkaProducerTestBase method runCustomPartitioningTest.
/**
*
* <pre>
* +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
* / | \
* / | \
* (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
* \ | /
* \ | /
* +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
* </pre>
*
* The mapper validates that the values come consistently from the correct Kafka partition.
*
* The final sink validates that there are no duplicates and that all partitions are present.
*/
public void runCustomPartitioningTest() {
try {
LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
final String topic = "customPartitioningTestTopic";
final int parallelism = 3;
createTestTopic(topic, parallelism, 1);
TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
// ------ producing topology ---------
// source has DOP 1 to make sure it generates no duplicates
DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
long cnt = 0;
while (running) {
ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(1);
Properties props = new Properties();
props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
props.putAll(secureProps);
// sink partitions into
kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
// ------ consuming topology ---------
Properties consumerProps = new Properties();
consumerProps.putAll(standardProps);
consumerProps.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {
private int ourPartition = -1;
@Override
public Integer map(Tuple2<Long, String> value) {
int partition = value.f0.intValue() % parallelism;
if (ourPartition != -1) {
assertEquals("inconsistent partitioning", ourPartition, partition);
} else {
ourPartition = partition;
}
return partition;
}
}).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {
private int[] valuesPerPartition = new int[parallelism];
@Override
public void invoke(Integer value) throws Exception {
valuesPerPartition[value]++;
boolean missing = false;
for (int i : valuesPerPartition) {
if (i < 100) {
missing = true;
break;
}
}
if (!missing) {
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "custom partitioning test");
deleteTestTopic(topic);
LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.test.util.SuccessException in project flink by apache.
the class ValidatingExactlyOnceSink method invoke.
@Override
public void invoke(Integer value) throws Exception {
numElements++;
if (duplicateChecker.get(value)) {
throw new Exception("Received a duplicate: " + value);
}
duplicateChecker.set(value);
if (numElements == numElementsTotal) {
// validate
if (duplicateChecker.cardinality() != numElementsTotal) {
throw new Exception("Duplicate checker has wrong cardinality");
} else if (duplicateChecker.nextClearBit(0) != numElementsTotal) {
throw new Exception("Received sparse sequence");
} else {
throw new SuccessException();
}
}
}
use of org.apache.flink.test.util.SuccessException in project flink by apache.
the class KafkaConsumerTestBase method runAllDeletesTest.
/**
* Test delete behavior and metrics for producer
* @throws Exception
*/
public void runAllDeletesTest() throws Exception {
final String topic = "alldeletestest";
createTestTopic(topic, 1, 1);
final int ELEMENT_COUNT = 300;
// ----------- Write some data into Kafka -------------------
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
DataStream<Tuple2<byte[], PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<byte[], PojoValue>>() {
@Override
public void run(SourceContext<Tuple2<byte[], PojoValue>> ctx) throws Exception {
Random rnd = new Random(1337);
for (long i = 0; i < ELEMENT_COUNT; i++) {
final byte[] key = new byte[200];
rnd.nextBytes(key);
ctx.collect(new Tuple2<>(key, (PojoValue) null));
}
}
@Override
public void cancel() {
}
});
TypeInformationKeyValueSerializationSchema<byte[], PojoValue> schema = new TypeInformationKeyValueSerializationSchema<>(byte[].class, PojoValue.class, env.getConfig());
Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
producerProperties.setProperty("retries", "3");
producerProperties.putAll(secureProps);
kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
env.execute("Write deletes to Kafka");
// ----------- Read the data again -------------------
env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, props));
fromKafka.flatMap(new RichFlatMapFunction<Tuple2<byte[], PojoValue>, Object>() {
long counter = 0;
@Override
public void flatMap(Tuple2<byte[], PojoValue> value, Collector<Object> out) throws Exception {
// ensure that deleted messages are passed as nulls
assertNull(value.f1);
counter++;
if (counter == ELEMENT_COUNT) {
// we got the right number of elements
throw new SuccessException();
}
}
});
tryExecute(env, "Read deletes from Kafka");
deleteTestTopic(topic);
}
Aggregations