Search in sources :

Example 1 with KafkaToPubsubOptions

use of com.google.cloud.teleport.v2.options.KafkaToPubsubOptions in project DataflowTemplates by GoogleCloudPlatform.

the class KafkaToPubsub method run.

/**
 * Runs a pipeline which reads message from Kafka and writes to Pub/Sub.
 *
 * @param options arguments to the pipeline
 */
public static PipelineResult run(KafkaToPubsubOptions options) {
    List<String> topicsList = new ArrayList<>(Arrays.asList(options.getInputTopics().split(",")));
    checkArgument(topicsList.size() > 0 && topicsList.stream().allMatch((s) -> s.trim().length() > 0), "inputTopics cannot be an empty string.");
    List<String> bootstrapServersList = new ArrayList<>(Arrays.asList(options.getBootstrapServers().split(",")));
    checkArgument(bootstrapServersList.size() > 0 && bootstrapServersList.stream().allMatch((s) -> s.trim().length() > 0), "bootstrapServers cannot be an empty string.");
    // Configure Kafka consumer properties
    Map<String, Object> kafkaConfig = new HashMap<>();
    Map<String, String> sslConfig = null;
    if (options.getSecretStoreUrl() != null && options.getVaultToken() != null) {
        Map<String, Map<String, String>> credentials = getKafkaCredentialsFromVault(options.getSecretStoreUrl(), options.getVaultToken());
        kafkaConfig = configureKafka(credentials.get(KafkaPubsubConstants.KAFKA_CREDENTIALS));
        sslConfig = credentials.get(KafkaPubsubConstants.SSL_CREDENTIALS);
    } else {
        LOG.warn("No information to retrieve Kafka credentials was provided. " + "Trying to initiate an unauthorized connection.");
    }
    // Create the pipeline
    Pipeline pipeline = Pipeline.create(options);
    // Register the coder for pipeline
    FailsafeElementCoder<KV<String, String>, String> coder = FailsafeElementCoder.of(KvCoder.of(NullableCoder.of(StringUtf8Coder.of()), NullableCoder.of(StringUtf8Coder.of())), NullableCoder.of(StringUtf8Coder.of()));
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    TypeDescriptor<String> stringTypeDescriptor = TypeDescriptors.strings();
    LOG.info("Starting Kafka-To-PubSub Pipeline with parameters bootstrap servers:{} input topics:{}" + " output pubsub topic:{} ", options.getBootstrapServers(), options.getInputTopics(), options.getOutputTopic());
    /*
     * Steps:
     *  1) Read messages in from Kafka
     *  2) Transform message payload via UDF
     *  3) Write successful records out to Pub/Sub
     *  4) Write failed records out to Pub/Sub dead-letter topic
     */
    PCollectionTuple appliedUdf = pipeline.apply("readFromKafka", readFromKafka(options.getBootstrapServers(), topicsList, kafkaConfig, sslConfig)).apply("applyUDF", new FormatTransform.UdfProcess(options));
    /* Step #3: Write the successful records out to Pub/Sub */
    appliedUdf.get(KafkaPubsubConstants.UDF_OUT).apply("getSuccessUDFOutElements", MapElements.into(stringTypeDescriptor).via(FailsafeElement::getPayload)).setCoder(NullableCoder.of(StringUtf8Coder.of())).apply("writeSuccessMessages", PubsubIO.writeStrings().to(options.getOutputTopic()));
    /* Step #4: Write failed messages out to Pub/Sub */
    if (options.getOutputDeadLetterTopic() != null) {
        appliedUdf.get(KafkaPubsubConstants.UDF_DEADLETTER_OUT).apply("getFailedMessages", MapElements.into(TypeDescriptors.kvs(stringTypeDescriptor, stringTypeDescriptor)).via(FailsafeElement::getOriginalPayload)).apply("extractMessageValues", MapElements.into(stringTypeDescriptor).via(KV<String, String>::getValue)).setCoder(NullableCoder.of(StringUtf8Coder.of())).apply("writeFailureMessages", PubsubIO.writeStrings().to(options.getOutputDeadLetterTopic()));
    }
    return pipeline.run();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) FormatTransform(com.google.cloud.teleport.v2.transforms.FormatTransform) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with KafkaToPubsubOptions

use of com.google.cloud.teleport.v2.options.KafkaToPubsubOptions in project DataflowTemplates by GoogleCloudPlatform.

the class KafkaToPubsub method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    KafkaToPubsubOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(KafkaToPubsubOptions.class);
    run(options);
}
Also used : KafkaToPubsubOptions(com.google.cloud.teleport.v2.options.KafkaToPubsubOptions)

Aggregations

KafkaToPubsubOptions (com.google.cloud.teleport.v2.options.KafkaToPubsubOptions)1 FormatTransform (com.google.cloud.teleport.v2.transforms.FormatTransform)1 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Pipeline (org.apache.beam.sdk.Pipeline)1 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)1 KV (org.apache.beam.sdk.values.KV)1 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)1