Search in sources :

Example 6 with TypeInformationSerializationSchema

use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.

the class Kafka010ITCase method testTimestamps.

/**
	 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka
	 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {
    final String topic = "tstopic";
    createTestTopic(topic, 3, 1);
    // ---------- Produce an event time stream into Kafka -------------------
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.setParallelism(1);
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {

        boolean running = true;

        @Override
        public void run(SourceContext<Long> ctx) throws Exception {
            long i = 0;
            while (running) {
                ctx.collectWithTimestamp(i, i * 2);
                if (i++ == 1000L) {
                    running = false;
                }
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    });
    final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(TypeInfoParser.<Long>parse("Long"), env.getConfig());
    FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new KafkaPartitioner<Long>() {

        @Override
        public int partition(Long next, byte[] serializedKey, byte[] serializedValue, int numPartitions) {
            return (int) (next % 3);
        }
    });
    prod.setParallelism(3);
    prod.setWriteTimestampToKafka(true);
    env.execute("Produce some");
    // ---------- Consume stream from Kafka -------------------
    env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.setParallelism(1);
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
    kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {

        @Nullable
        @Override
        public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
            if (lastElement % 10 == 0) {
                return new Watermark(lastElement);
            }
            return null;
        }

        @Override
        public long extractTimestamp(Long element, long previousElementTimestamp) {
            return previousElementTimestamp;
        }
    });
    DataStream<Long> stream = env.addSource(kafkaSource);
    GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
    stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);
    env.execute("Consume again");
    deleteTestTopic(topic);
}
Also used : IOException(java.io.IOException) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 7 with TypeInformationSerializationSchema

use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.

the class KafkaConsumerTestBase method runFailOnDeployTest.

/**
	 * Tests that the source can be properly canceled when reading full partitions. 
	 */
public void runFailOnDeployTest() throws Exception {
    final String topic = "failOnDeployTopic";
    createTestTopic(topic, 2, 1);
    DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    // needs to be more that the mini cluster has slots
    env.setParallelism(12);
    env.getConfig().disableSysoutLogging();
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
    env.addSource(kafkaSource).addSink(new DiscardingSink<Integer>());
    try {
        env.execute("test fail on deploy");
        fail("this test should fail with an exception");
    } catch (ProgramInvocationException e) {
        // validate that we failed due to a NoResourceAvailableException
        Throwable cause = e.getCause();
        int depth = 0;
        boolean foundResourceException = false;
        while (cause != null && depth++ < 20) {
            if (cause instanceof NoResourceAvailableException) {
                foundResourceException = true;
                break;
            }
            cause = cause.getCause();
        }
        assertTrue("Wrong exception", foundResourceException);
    }
    deleteTestTopic(topic);
}
Also used : TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException)

Example 8 with TypeInformationSerializationSchema

use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.

the class KafkaConsumerTestBase method runOneToOneExactlyOnceTest.

/**
	 * Tests the proper consumption when having a 1:1 correspondence between kafka partitions and
	 * Flink sources.
	 */
public void runOneToOneExactlyOnceTest() throws Exception {
    final String topic = "oneToOneTopic";
    final int parallelism = 5;
    final int numElementsPerPartition = 1000;
    final int totalElements = parallelism * numElementsPerPartition;
    final int failAfterElements = numElementsPerPartition / 3;
    createTestTopic(topic, parallelism, 1);
    DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, parallelism, numElementsPerPartition, true);
    // run the topology that fails and recovers
    DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
    env.getConfig().disableSysoutLogging();
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
    env.addSource(kafkaSource).map(new PartitionValidatingMapper(parallelism, 1)).map(new FailingIdentityMapper<Integer>(failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
    FailingIdentityMapper.failedBefore = false;
    tryExecute(env, "One-to-one exactly once test");
    deleteTestTopic(topic);
}
Also used : TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint)

Example 9 with TypeInformationSerializationSchema

use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.

the class KafkaConsumerTestBase method runOneSourceMultiplePartitionsExactlyOnceTest.

/**
	 * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
	 * one Flink source will read multiple Kafka partitions.
	 */
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
    final String topic = "oneToManyTopic";
    final int numPartitions = 5;
    final int numElementsPerPartition = 1000;
    final int totalElements = numPartitions * numElementsPerPartition;
    final int failAfterElements = numElementsPerPartition / 3;
    final int parallelism = 2;
    createTestTopic(topic, numPartitions, 1);
    DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, numPartitions, numElementsPerPartition, false);
    // run the topology that fails and recovers
    DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
    env.getConfig().disableSysoutLogging();
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
    env.addSource(kafkaSource).map(new PartitionValidatingMapper(numPartitions, 3)).map(new FailingIdentityMapper<Integer>(failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
    FailingIdentityMapper.failedBefore = false;
    tryExecute(env, "One-source-multi-partitions exactly once test");
    deleteTestTopic(topic);
}
Also used : TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint)

Example 10 with TypeInformationSerializationSchema

use of org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema in project flink by apache.

the class KafkaConsumerTestBase method runBrokerFailureTest.

public void runBrokerFailureTest() throws Exception {
    final String topic = "brokerFailureTestTopic";
    final int parallelism = 2;
    final int numElementsPerPartition = 1000;
    final int totalElements = parallelism * numElementsPerPartition;
    final int failAfterElements = numElementsPerPartition / 3;
    createTestTopic(topic, parallelism, 2);
    DataGenerators.generateRandomizedIntegerSequence(StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort), kafkaServer, topic, parallelism, numElementsPerPartition, true);
    // find leader to shut down
    int leaderId = kafkaServer.getLeaderToShutDown(topic);
    LOG.info("Leader to shutdown {}", leaderId);
    // run the topology (the consumers must handle the failures)
    DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.setParallelism(parallelism);
    env.enableCheckpointing(500);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getConfig().disableSysoutLogging();
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
    env.addSource(kafkaSource).map(new PartitionValidatingMapper(parallelism, 1)).map(new BrokerKillingMapper<Integer>(leaderId, failAfterElements)).addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);
    BrokerKillingMapper.killedLeaderBefore = false;
    tryExecute(env, "Broker failure once test");
    // start a new broker:
    kafkaServer.restartBroker(leaderId);
}
Also used : TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint)

Aggregations

TypeInformationSerializationSchema (org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema)14 Properties (java.util.Properties)10 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)10 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 SuccessException (org.apache.flink.test.util.SuccessException)5 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)4 PartitionValidatingMapper (org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper)4 ValidatingExactlyOnceSink (org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink)4 IOException (java.io.IOException)3 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)3 NoResourceAvailableException (org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException)3 RetryOnException (org.apache.flink.testutils.junit.RetryOnException)3 Test (org.junit.Test)3 BitSet (java.util.BitSet)2 Random (java.util.Random)2 Configuration (org.apache.flink.configuration.Configuration)2 JobCancellationException (org.apache.flink.runtime.client.JobCancellationException)2 TimeoutException (org.apache.kafka.common.errors.TimeoutException)2