Search in sources :

Example 56 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryToElasticsearch method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    BigQueryToElasticsearchOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigQueryToElasticsearchOptions.class);
    run(options);
}
Also used : BigQueryToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.BigQueryToElasticsearchOptions)

Example 57 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryToElasticsearch method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
private static PipelineResult run(BigQueryToElasticsearchOptions options) {
    // Create the pipeline.
    Pipeline pipeline = Pipeline.create(options);
    /*
     * Steps: 1) Read records from BigQuery via BigQueryIO.
     *        2) Create json string from Table Row.
     *        3) Write records to Elasticsearch.
     *
     *
     * Step #1: Read from BigQuery. If a query is provided then it is used to get the TableRows.
     */
    pipeline.apply("ReadFromBigQuery", ReadBigQuery.newBuilder().setOptions(options.as(BigQueryToElasticsearchOptions.class)).build()).apply("TableRowsToJsonDocument", ParDo.of(new TableRowToJsonFn())).apply("WriteToElasticsearch", WriteToElasticsearch.newBuilder().setOptions(options.as(BigQueryToElasticsearchOptions.class)).build());
    return pipeline.run();
}
Also used : BigQueryToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.BigQueryToElasticsearchOptions) TableRowToJsonFn(com.google.cloud.teleport.v2.transforms.BigQueryConverters.TableRowToJsonFn) Pipeline(org.apache.beam.sdk.Pipeline)

Example 58 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearch method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    GCSToElasticsearchOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(GCSToElasticsearchOptions.class);
    run(options);
}
Also used : GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)

Example 59 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToElasticsearch method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(PubSubToElasticsearchOptions options) {
    // Create the pipeline
    Pipeline pipeline = Pipeline.create(options);
    // Register the coders for pipeline
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
    coderRegistry.registerCoderForType(CODER.getEncodedTypeDescriptor(), CODER);
    /*
     * Steps: 1) Read PubSubMessage with attributes from input PubSub subscription.
     *        2) Apply Javascript UDF if provided.
     *        3) Index Json string to output ES index.
     *
     */
    LOG.info("Reading from subscription: " + options.getInputSubscription());
    PCollectionTuple convertedPubsubMessages = pipeline.apply("ReadPubSubSubscription", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription())).apply("ConvertMessageToJsonDocument", PubSubMessageToJsonDocument.newBuilder().setJavascriptTextTransformFunctionName(options.getJavascriptTextTransformFunctionName()).setJavascriptTextTransformGcsPath(options.getJavascriptTextTransformGcsPath()).build());
    /*
     * Step #3a: Write Json documents into Elasticsearch using {@link ElasticsearchTransforms.WriteToElasticsearch}.
     */
    convertedPubsubMessages.get(TRANSFORM_OUT).apply("GetJsonDocuments", MapElements.into(TypeDescriptors.strings()).via(FailsafeElement::getPayload)).apply("Insert metadata", new ProcessEventMetadata()).apply("WriteToElasticsearch", WriteToElasticsearch.newBuilder().setOptions(options.as(PubSubToElasticsearchOptions.class)).build());
    /*
     * Step 3b: Write elements that failed processing to error output PubSub topic via {@link PubSubIO}.
     */
    convertedPubsubMessages.get(TRANSFORM_ERROROUTPUT_OUT).apply(ParDo.of(new FailedPubsubMessageToPubsubTopicFn())).apply("writeFailureMessages", PubsubIO.writeMessages().to(options.getErrorOutputTopic()));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : FailedPubsubMessageToPubsubTopicFn(com.google.cloud.teleport.v2.elasticsearch.transforms.FailedPubsubMessageToPubsubTopicFn) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) ProcessEventMetadata(com.google.cloud.teleport.v2.elasticsearch.transforms.ProcessEventMetadata) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Pipeline(org.apache.beam.sdk.Pipeline) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)

Example 60 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearchTest method testWriteToElasticsearchBuilder.

/**
 * Tests that the {@link WriteToElasticsearch} throws exception when only one retry configuration
 * value is provided.
 */
@Test
public void testWriteToElasticsearchBuilder() {
    exceptionRule.expect(IllegalArgumentException.class);
    GCSToElasticsearchOptions options = PipelineOptionsFactory.create().as(GCSToElasticsearchOptions.class);
    options.setConnectionUrl("http://my-node");
    options.setContainsHeaders(false);
    options.setInputFileSpec(NO_HEADER_CSV_FILE_PATH);
    options.setBatchSize(10000L);
    options.setBatchSizeBytes(500000L);
    options.setMaxRetryAttempts(5);
    options.setMaxRetryDuration(null);
    options.setApiKey("key");
    pipeline.apply(Create.of("{}").withCoder(StringUtf8Coder.of())).apply("BuildWriteToElasticSearchObject", WriteToElasticsearch.newBuilder().setOptions(options.as(ElasticsearchWriteOptions.class)).build());
    pipeline.run();
}
Also used : GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions) ElasticsearchWriteOptions(com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6