Search in sources :

Example 21 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestoreWithReshardedStream.

@Test
public void testRestoreWithReshardedStream() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        // setup the closed shard
        Shard closedShard = new Shard();
        closedShard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange closedSequenceNumberRange = new SequenceNumberRange();
        closedSequenceNumberRange.withStartingSequenceNumber("1");
        closedSequenceNumberRange.withEndingSequenceNumber(// this represents a closed shard
        "1087654321");
        closedShard.setSequenceNumberRange(closedSequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), closedShard));
        // setup the new shards
        Shard newSplitShard1 = new Shard();
        newSplitShard1.setShardId(KinesisShardIdGenerator.generateFromShardOrder(1));
        SequenceNumberRange newSequenceNumberRange1 = new SequenceNumberRange();
        newSequenceNumberRange1.withStartingSequenceNumber("1087654322");
        newSplitShard1.setSequenceNumberRange(newSequenceNumberRange1);
        newSplitShard1.setParentShardId(TEST_SHARD_ID);
        Shard newSplitShard2 = new Shard();
        newSplitShard2.setShardId(KinesisShardIdGenerator.generateFromShardOrder(2));
        SequenceNumberRange newSequenceNumberRange2 = new SequenceNumberRange();
        newSequenceNumberRange2.withStartingSequenceNumber("2087654322");
        newSplitShard2.setSequenceNumberRange(newSequenceNumberRange2);
        newSplitShard2.setParentShardId(TEST_SHARD_ID);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard1));
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard2));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that state is correctly restored
    assertNotEquals(null, consumerFunction.getRestoredState());
    assertEquals(1, consumerFunction.getRestoredState().size());
    assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
    // assert that the fetcher is registered with all shards, including new shards
    assertEquals(3, fetcher.getSubscribedShardsState().size());
    KinesisStreamShardState restoredClosedShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredClosedShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredClosedShardState.getStreamShardHandle().getShard().getShardId());
    assertTrue(restoredClosedShardState.getStreamShardHandle().isClosed());
    assertEquals(TEST_SEQUENCE_NUMBER, restoredClosedShardState.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard1 = fetcher.getSubscribedShardsState().get(1);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard1.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), restoredNewSplitShard1.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard1.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard1.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard2 = fetcher.getSubscribedShardsState().get(2);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard2.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), restoredNewSplitShard2.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard2.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard2.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 22 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class ConsumeFromKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
    DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>("flink-test", new SimpleStringSchema(), kinesisConsumerConfig));
    kinesis.print();
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 23 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class TestAvroConsumerConfluent method main.

public static void main(String[] args) throws Exception {
    // parse input arguments
    final ParameterTool parameterTool = ParameterTool.fromArgs(args);
    if (parameterTool.getNumberOfParameters() < 6) {
        System.out.println("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-string-topic <topic> --output-avro-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--schema-registry-url <confluent schema registry> --group.id <some id>");
        return;
    }
    Properties config = new Properties();
    config.setProperty("bootstrap.servers", parameterTool.getRequired("bootstrap.servers"));
    config.setProperty("group.id", parameterTool.getRequired("group.id"));
    String schemaRegistryUrl = parameterTool.getRequired("schema-registry-url");
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<User> input = env.addSource(new FlinkKafkaConsumer<>(parameterTool.getRequired("input-topic"), ConfluentRegistryAvroDeserializationSchema.forSpecific(User.class, schemaRegistryUrl), config).setStartFromEarliest());
    SingleOutputStreamOperator<String> mapToString = input.map((MapFunction<User, String>) SpecificRecordBase::toString);
    KafkaSink<String> stringSink = KafkaSink.<String>builder().setBootstrapServers(config.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)).setRecordSerializer(KafkaRecordSerializationSchema.builder().setValueSerializationSchema(new SimpleStringSchema()).setTopic(parameterTool.getRequired("output-string-topic")).build()).setKafkaProducerConfig(config).build();
    mapToString.sinkTo(stringSink);
    KafkaSink<User> avroSink = KafkaSink.<User>builder().setBootstrapServers(config.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)).setRecordSerializer(KafkaRecordSerializationSchema.builder().setValueSerializationSchema(ConfluentRegistryAvroSerializationSchema.forSpecific(User.class, parameterTool.getRequired("output-subject"), schemaRegistryUrl)).setTopic(parameterTool.getRequired("output-avro-topic")).build()).build();
    input.sinkTo(avroSink);
    env.execute("Kafka Confluent Schema Registry AVRO Example");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) User(example.avro.User) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties) FlinkKafkaConsumer(org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer)

Example 24 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class EmulatedPubSubSinkTest method testPubSubSinkThrowsExceptionOnFailure.

@Test(expected = Exception.class)
public void testPubSubSinkThrowsExceptionOnFailure() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    env.setParallelism(1);
    env.setRestartStrategy(RestartStrategies.noRestart());
    // Create test stream
    // use source function to prevent the job from shutting down before a checkpoint has been
    // made
    env.addSource(new SingleInputSourceFunction()).map((MapFunction<String, String>) StringUtils::reverse).addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(TOPIC_NAME).withHostAndPortForEmulator("unknown-host-to-force-sink-crash:1234").withCredentials(EmulatorCredentials.getInstance()).build()).name("PubSub sink");
    // Run
    env.execute();
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 25 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class PulsarDeserializationSchemaTest method createFromFlinkDeserializationSchema.

@Test
void createFromFlinkDeserializationSchema() throws Exception {
    PulsarDeserializationSchema<String> schema = flinkSchema(new SimpleStringSchema());
    schema.open(new TestingDeserializationContext(), mock(SourceConfiguration.class));
    assertDoesNotThrow(() -> InstantiationUtil.clone(schema));
    Message<byte[]> message = getMessage("some-sample-message", String::getBytes);
    SingleMessageCollector<String> collector = new SingleMessageCollector<>();
    schema.deserialize(message, collector);
    assertNotNull(collector.result);
    assertEquals(collector.result, "some-sample-message");
}
Also used : SourceConfiguration(org.apache.flink.connector.pulsar.source.config.SourceConfiguration) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) TestingDeserializationContext(org.apache.flink.connector.testutils.source.deserialization.TestingDeserializationContext) Test(org.junit.jupiter.api.Test)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5