use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.
the class Kafka010ITCase method testTimestamps.
/**
* Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka
*/
@Test(timeout = 60000)
public void testTimestamps() throws Exception {
final String topic = "tstopic";
createTestTopic(topic, 3, 1);
// ---------- Produce an event time stream into Kafka -------------------
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
boolean running = true;
@Override
public void run(SourceContext<Long> ctx) throws Exception {
long i = 0;
while (running) {
ctx.collectWithTimestamp(i, i * 2);
if (i++ == 1000L) {
running = false;
}
}
}
@Override
public void cancel() {
running = false;
}
});
final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(TypeInfoParser.<Long>parse("Long"), env.getConfig());
FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new KafkaPartitioner<Long>() {
@Override
public int partition(Long next, byte[] serializedKey, byte[] serializedValue, int numPartitions) {
return (int) (next % 3);
}
});
prod.setParallelism(3);
prod.setWriteTimestampToKafka(true);
env.execute("Produce some");
// ---------- Consume stream from Kafka -------------------
env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
@Nullable
@Override
public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
if (lastElement % 10 == 0) {
return new Watermark(lastElement);
}
return null;
}
@Override
public long extractTimestamp(Long element, long previousElementTimestamp) {
return previousElementTimestamp;
}
});
DataStream<Long> stream = env.addSource(kafkaSource);
GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);
env.execute("Consume again");
deleteTestTopic(topic);
}
use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.
the class KafkaConsumerTestBase method runFailOnDeployTest.
/**
* Tests that the source can be properly canceled when reading full partitions.
*/
public void runFailOnDeployTest() throws Exception {
final String topic = "failOnDeployTopic";
createTestTopic(topic, 2, 1);
DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
// needs to be more that the mini cluster has slots
env.setParallelism(12);
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
env.addSource(kafkaSource).addSink(new DiscardingSink<Integer>());
try {
env.execute("test fail on deploy");
fail("this test should fail with an exception");
} catch (ProgramInvocationException e) {
// validate that we failed due to a NoResourceAvailableException
Throwable cause = e.getCause();
int depth = 0;
boolean foundResourceException = false;
while (cause != null && depth++ < 20) {
if (cause instanceof NoResourceAvailableException) {
foundResourceException = true;
break;
}
cause = cause.getCause();
}
assertTrue("Wrong exception", foundResourceException);
}
deleteTestTopic(topic);
}
use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.
the class KafkaConsumerTestBase method runOneToOneExactlyOnceTest.
/**
* Tests the proper consumption when having a 1:1 correspondence between kafka partitions and
* Flink sources.
*/
public void runOneToOneExactlyOnceTest() throws Exception {
final String topic = "oneToOneTopic";
final int parallelism = 5;
final int numElementsPerPartition = 1000;
final int totalElements = parallelism * numElementsPerPartition;
final int failAfterElements = numElementsPerPartition / 3;
createTestTopic(topic, parallelism, 1);
DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, parallelism, numElementsPerPartition, true);
// run the topology that fails and recovers
DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.enableCheckpointing(500);
env.setParallelism(parallelism);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
env.addSource(kafkaSource).map(new PartitionValidatingMapper(parallelism, 1)).map(new FailingIdentityMapper<Integer>(failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
FailingIdentityMapper.failedBefore = false;
tryExecute(env, "One-to-one exactly once test");
deleteTestTopic(topic);
}
use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.
the class KafkaConsumerTestBase method runOneSourceMultiplePartitionsExactlyOnceTest.
/**
* Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
* one Flink source will read multiple Kafka partitions.
*/
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
final String topic = "oneToManyTopic";
final int numPartitions = 5;
final int numElementsPerPartition = 1000;
final int totalElements = numPartitions * numElementsPerPartition;
final int failAfterElements = numElementsPerPartition / 3;
final int parallelism = 2;
createTestTopic(topic, numPartitions, 1);
DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, numPartitions, numElementsPerPartition, false);
// run the topology that fails and recovers
DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.enableCheckpointing(500);
env.setParallelism(parallelism);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
env.addSource(kafkaSource).map(new PartitionValidatingMapper(numPartitions, 3)).map(new FailingIdentityMapper<Integer>(failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
FailingIdentityMapper.failedBefore = false;
tryExecute(env, "One-source-multi-partitions exactly once test");
deleteTestTopic(topic);
}
use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.
the class KafkaConsumerTestBase method runBrokerFailureTest.
public void runBrokerFailureTest() throws Exception {
final String topic = "brokerFailureTestTopic";
final int parallelism = 2;
final int numElementsPerPartition = 1000;
final int totalElements = parallelism * numElementsPerPartition;
final int failAfterElements = numElementsPerPartition / 3;
createTestTopic(topic, parallelism, 2);
DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, parallelism, numElementsPerPartition, true);
// find leader to shut down
int leaderId = kafkaServer.getLeaderToShutDown(topic);
LOG.info("Leader to shutdown {}", leaderId);
// run the topology (the consumers must handle the failures)
DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setParallelism(parallelism);
env.enableCheckpointing(500);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
env.addSource(kafkaSource).map(new PartitionValidatingMapper(parallelism, 1)).map(new BrokerKillingMapper<Integer>(leaderId, failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
BrokerKillingMapper.killedLeaderBefore = false;
tryExecute(env, "Broker failure once test");
// start a new broker:
kafkaServer.restartBroker(leaderId);
}
Aggregations