Search in sources :

Example 1 with AscendingTimestampExtractor

use of org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor in project flink by apache.

the class KafkaConsumerTestBase method runCollectingSchemaTest.

/**
 * Test that ensures that DeserializationSchema can emit multiple records via a Collector.
 *
 * @throws Exception
 */
public void runCollectingSchemaTest() throws Exception {
    final int elementCount = 20;
    final String topic = writeSequence("testCollectingSchema", elementCount, 1, 1);
    // read using custom schema
    final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
    env1.setParallelism(1);
    env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    DataStream<Tuple2<Integer, String>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new CollectingDeserializationSchema(elementCount), props).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<Integer, String>>() {

        @Override
        public long extractAscendingTimestamp(Tuple2<Integer, String> element) {
            String string = element.f1;
            return Long.parseLong(string.substring(0, string.length() - 1));
        }
    }));
    fromKafka.keyBy(t -> t.f0).process(new KeyedProcessFunction<Integer, Tuple2<Integer, String>, Void>() {

        private boolean registered = false;

        @Override
        public void processElement(Tuple2<Integer, String> value, Context ctx, Collector<Void> out) throws Exception {
            if (!registered) {
                ctx.timerService().registerEventTimeTimer(elementCount - 2);
                registered = true;
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Void> out) throws Exception {
            throw new SuccessException();
        }
    });
    tryExecute(env1, "Consume " + elementCount + " elements from Kafka");
    deleteTestTopic(topic);
}
Also used : RetryOnException(org.apache.flink.testutils.junit.RetryOnException) Tuple1(org.apache.flink.api.java.tuple.Tuple1) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KafkaSourceBuilder(org.apache.flink.connector.kafka.source.KafkaSourceBuilder) Map(java.util.Map) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Set(java.util.Set) StreamingJobGraphGenerator(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator) KeyedSerializationSchema(org.apache.flink.streaming.util.serialization.KeyedSerializationSchema) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) ClusterClient(org.apache.flink.client.program.ClusterClient) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assert.assertFalse(org.junit.Assert.assertFalse) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) JobStatus(org.apache.flink.api.common.JobStatus) DataOutputView(org.apache.flink.core.memory.DataOutputView) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) ManagementFactory(java.lang.management.ManagementFactory) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Nullable(javax.annotation.Nullable) Before(org.junit.Before) TestUtils.tryExecute(org.apache.flink.test.util.TestUtils.tryExecute) Properties(java.util.Properties) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) JobID(org.apache.flink.api.common.JobID) Assert.assertNull(org.junit.Assert.assertNull) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Date(java.util.Date) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) SuccessException(org.apache.flink.test.util.SuccessException) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TopicPartition(org.apache.kafka.common.TopicPartition) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ObjectName(javax.management.ObjectName) UUID(java.util.UUID) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) List(java.util.List) ClusterCommunicationUtils.waitUntilNoJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilNoJobIsRunning) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) OffsetsInitializer(org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer) ClusterCommunicationUtils.waitUntilJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilJobIsRunning) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) AtomicReference(java.util.concurrent.atomic.AtomicReference) Tuple2FlinkPartitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner) MBeanServer(javax.management.MBeanServer) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) DataInputView(org.apache.flink.core.memory.DataInputView) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) DataGenerators(org.apache.flink.streaming.connectors.kafka.testutils.DataGenerators) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) TimeoutException(org.apache.kafka.common.errors.TimeoutException) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Configuration(org.apache.flink.configuration.Configuration) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) DataStream(org.apache.flink.streaming.api.datastream.DataStream) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FailingIdentityMapper(org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper) ClusterCommunicationUtils.getRunningJobs(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.getRunningJobs) TypeInformationKeyValueSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationKeyValueSerializationSchema) BitSet(java.util.BitSet) Collections(java.util.Collections) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) SuccessException(org.apache.flink.test.util.SuccessException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with AscendingTimestampExtractor

use of org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor in project flink by apache.

the class JdbcDynamicTableSinkITCase method testUpsert.

@Test
public void testUpsert() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    Table t = tEnv.fromDataStream(get4TupleDataStream(env).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple4<Integer, Long, String, Timestamp>>() {

        @Override
        public long extractAscendingTimestamp(Tuple4<Integer, Long, String, Timestamp> element) {
            return element.f0;
        }
    }), $("id"), $("num"), $("text"), $("ts"));
    tEnv.createTemporaryView("T", t);
    tEnv.executeSql("CREATE TABLE upsertSink (" + "  cnt BIGINT," + "  lencnt BIGINT," + "  cTag INT," + "  ts TIMESTAMP(3)," + "  PRIMARY KEY (cnt, cTag) NOT ENFORCED" + ") WITH (" + "  'connector'='jdbc'," + "  'url'='" + DB_URL + "'," + "  'table-name'='" + OUTPUT_TABLE1 + "'," + "  'sink.buffer-flush.max-rows' = '2'," + "  'sink.buffer-flush.interval' = '0'," + "  'sink.max-retries' = '0'" + ")");
    tEnv.executeSql("INSERT INTO upsertSink \n" + "SELECT cnt, COUNT(len) AS lencnt, cTag, MAX(ts) AS ts\n" + "FROM (\n" + "  SELECT len, COUNT(id) as cnt, cTag, MAX(ts) AS ts\n" + "  FROM (SELECT id, CHAR_LENGTH(text) AS len, (CASE WHEN id > 0 THEN 1 ELSE 0 END) cTag, ts FROM T)\n" + "  GROUP BY len, cTag\n" + ")\n" + "GROUP BY cnt, cTag").await();
    check(new Row[] { Row.of(1, 5, 1, Timestamp.valueOf("1970-01-01 00:00:00.006")), Row.of(7, 1, 1, Timestamp.valueOf("1970-01-01 00:00:00.021")), Row.of(9, 1, 1, Timestamp.valueOf("1970-01-01 00:00:00.015")) }, DB_URL, OUTPUT_TABLE1, new String[] { "cnt", "lencnt", "cTag", "ts" });
}
Also used : Tuple4(org.apache.flink.api.java.tuple.Tuple4) Table(org.apache.flink.table.api.Table) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Timestamp(java.sql.Timestamp) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Test(org.junit.Test)

Example 3 with AscendingTimestampExtractor

use of org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor in project flink by apache.

the class OracleTableSinkITCase method testUpsert.

@Test
public void testUpsert() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    Table t = tEnv.fromDataStream(get4TupleDataStream(env).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple4<Integer, Long, String, Timestamp>>() {

        @Override
        public long extractAscendingTimestamp(Tuple4<Integer, Long, String, Timestamp> element) {
            return element.f0;
        }
    }), $("id"), $("num"), $("text"), $("ts"));
    tEnv.createTemporaryView("T", t);
    tEnv.executeSql("CREATE TABLE upsertSink (" + "  cnt DECIMAL(18,2)," + "  lencnt DECIMAL(18,2)," + "  cTag INT," + "  ts TIMESTAMP(3)," + "  PRIMARY KEY (cnt, cTag) NOT ENFORCED" + ") WITH (" + "  'connector'='jdbc'," + "  'url'='" + containerUrl + "'," + "  'table-name'='" + OUTPUT_TABLE1 + "'," + "  'sink.buffer-flush.max-rows' = '2'," + "  'sink.buffer-flush.interval' = '0'," + "  'sink.max-retries' = '0'" + ")");
    tEnv.executeSql("INSERT INTO upsertSink \n" + "SELECT cnt, COUNT(len) AS lencnt, cTag, MAX(ts) AS ts\n" + "FROM (\n" + "  SELECT len, COUNT(id) as cnt, cTag, MAX(ts) AS ts\n" + "  FROM (SELECT id, CHAR_LENGTH(text) AS len, (CASE WHEN id > 0 THEN 1 ELSE 0 END) cTag, ts FROM T)\n" + "  GROUP BY len, cTag\n" + ")\n" + "GROUP BY cnt, cTag").await();
    check(new Row[] { Row.of(1, 5, 1, Timestamp.valueOf("1970-01-01 00:00:00.006")), Row.of(7, 1, 1, Timestamp.valueOf("1970-01-01 00:00:00.021")), Row.of(9, 1, 1, Timestamp.valueOf("1970-01-01 00:00:00.015")) }, containerUrl, OUTPUT_TABLE1, new String[] { "cnt", "lencnt", "cTag", "ts" });
}
Also used : Tuple4(org.apache.flink.api.java.tuple.Tuple4) Table(org.apache.flink.table.api.Table) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Timestamp(java.sql.Timestamp) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 AscendingTimestampExtractor (org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor)3 Timestamp (java.sql.Timestamp)2 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)2 Table (org.apache.flink.table.api.Table)2 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 ManagementFactory (java.lang.management.ManagementFactory)1 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 Collections (java.util.Collections)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 Properties (java.util.Properties)1 Random (java.util.Random)1