Search in sources :

Example 16 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearchTest method testGCSToElasticsearchJsonSchemaE2E.

/**
 * Tests the {@link GCSToElasticsearch} pipeline using a JSON schema to parse the Csv.
 */
@Test
public void testGCSToElasticsearchJsonSchemaE2E() {
    final String record = "007,CA,26.23";
    final String stringifiedJsonRecord = "{\"id\":\"007\",\"state\":\"CA\",\"price\":26.23}";
    final FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(NullableCoder.of(StringUtf8Coder.of()), NullableCoder.of(StringUtf8Coder.of()));
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    GCSToElasticsearchOptions options = PipelineOptionsFactory.create().as(GCSToElasticsearchOptions.class);
    options.setJsonSchemaPath(JSON_SCHEMA_FILE_PATH);
    options.setContainsHeaders(false);
    options.setInputFileSpec(NO_HEADER_CSV_FILE_PATH);
    options.setApiKey("key");
    // Build pipeline with no headers.
    PCollectionTuple readCsvOut = pipeline.apply("ReadCsv", CsvConverters.ReadCsv.newBuilder().setCsvFormat(options.getCsvFormat()).setDelimiter(options.getDelimiter()).setHasHeaders(options.getContainsHeaders()).setInputFileSpec(options.getInputFileSpec()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setFileEncoding(options.getCsvFileEncoding()).build()).apply("ConvertLine", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setUdfOutputTag(GCSToElasticsearch.PROCESSING_OUT).setUdfDeadletterTag(GCSToElasticsearch.PROCESSING_DEADLETTER_OUT).build());
    // Assert
    PAssert.that(readCsvOut.get(GCSToElasticsearch.PROCESSING_OUT)).satisfies(collection -> {
        FailsafeElement element = collection.iterator().next();
        assertThat(element.getOriginalPayload(), is(equalTo(record)));
        assertThat(element.getPayload(), is(equalTo(stringifiedJsonRecord)));
        return null;
    });
    // Execute pipeline
    pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test)

Example 17 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearchTest method testGCSToElasticsearchHeadersE2E.

/**
 * Tests the {@link GCSToElasticsearch} pipeline the headers of the Csv to parse it.
 */
@Test
public void testGCSToElasticsearchHeadersE2E() {
    final String record = "007,CA,26.23";
    final String stringJsonRecord = "{\"id\":\"007\",\"state\":\"CA\",\"price\":\"26.23\"}";
    final FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(NullableCoder.of(StringUtf8Coder.of()), NullableCoder.of(StringUtf8Coder.of()));
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    GCSToElasticsearchOptions options = PipelineOptionsFactory.create().as(GCSToElasticsearchOptions.class);
    options.setContainsHeaders(true);
    options.setInputFileSpec(HEADER_CSV_FILE_PATH);
    options.setApiKey("key");
    // Build pipeline with no headers.
    PCollectionTuple readCsvOut = pipeline.apply("ReadCsv", CsvConverters.ReadCsv.newBuilder().setCsvFormat(options.getCsvFormat()).setDelimiter(options.getDelimiter()).setHasHeaders(options.getContainsHeaders()).setInputFileSpec(options.getInputFileSpec()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setFileEncoding(options.getCsvFileEncoding()).build()).apply("ConvertLine", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setUdfOutputTag(GCSToElasticsearch.PROCESSING_OUT).setUdfDeadletterTag(GCSToElasticsearch.PROCESSING_DEADLETTER_OUT).build());
    // Assert
    PAssert.that(readCsvOut.get(GCSToElasticsearch.PROCESSING_OUT)).satisfies(collection -> {
        FailsafeElement element = collection.iterator().next();
        assertThat(element.getOriginalPayload(), is(equalTo(record)));
        assertThat(element.getPayload(), is(equalTo(stringJsonRecord)));
        return null;
    });
    // Execute pipeline
    pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test)

Example 18 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToElasticsearchTest method testPubSubToElasticsearchNoUdfE2E.

/**
 * Tests the {@link PubSubToElasticsearch} pipeline end-to-end with no UDF supplied.
 */
@Test
public void testPubSubToElasticsearchNoUdfE2E() {
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER);
    coderRegistry.registerCoderForType(PubSubToElasticsearch.CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.CODER);
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setJavascriptTextTransformFunctionName(null);
    options.setJavascriptTextTransformGcsPath(null);
    options.setApiKey("key");
    PCollectionTuple pc = pipeline.apply(Create.of(goodTestMessages.get(0))).apply(PubSubMessageToJsonDocument.newBuilder().setJavascriptTextTransformFunctionName(options.getJavascriptTextTransformFunctionName()).setJavascriptTextTransformGcsPath(options.getJavascriptTextTransformGcsPath()).build());
    PAssert.that(pc.get(PubSubToElasticsearch.TRANSFORM_OUT)).satisfies(collection -> {
        FailsafeElement<PubsubMessage, String> element = collection.iterator().next();
        assertThat(element.getOriginalPayload().getPayload(), is(equalTo(goodTestMessages.get(0).getPayload())));
        return null;
    });
    // Execute pipeline
    pipeline.run(options);
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Example 19 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class EventMetadataBuilderTest method testEventMetadataAppend.

@Test
public void testEventMetadataAppend() throws IOException {
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setApiKey("key");
    options.setDataset(Dataset.AUDIT);
    options.setNamespace("test-namespace");
    String inputMessage = readInputMessage(INPUT_MESSAGE_FILE_PATH);
    EventMetadataBuilder eventMetadataBuilder = EventMetadataBuilder.build(inputMessage, options);
    JsonNode enrichedMessageAsJson = eventMetadataBuilder.getEnrichedMessageAsJsonNode();
    // if elasticsearchTemplateVersion is not set, 1.0.0 is the default value
    Assert.assertEquals("1.0.0", enrichedMessageAsJson.get("agent").get("version").textValue());
    Assert.assertEquals(enrichedMessageAsJson.get("data_stream").get("dataset").textValue(), Dataset.AUDIT.getKeyWithPrefix());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Example 20 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class EventMetadataBuilderTest method testEventMetadataAppendFailed.

@Test
public void testEventMetadataAppendFailed() throws IOException {
    exceptionRule.expect(IllegalStateException.class);
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setApiKey("key");
    options.setDataset(Dataset.AUDIT);
    options.setNamespace("test-namespace");
    String inputMessageInvalid = readInputMessage(INPUT_MESSAGE_INVALID_FILE_PATH);
    EventMetadataBuilder eventMetadataBuilder = EventMetadataBuilder.build(inputMessageInvalid, options);
    JsonNode enrichedMessageAsJson = eventMetadataBuilder.getEnrichedMessageAsJsonNode();
    // if elasticsearchTemplateVersion is not set, 1.0.0 is the default value
    Assert.assertEquals("1.0.0", enrichedMessageAsJson.get("agent").get("version").textValue());
    Assert.assertEquals(enrichedMessageAsJson.get("data_stream").get("dataset").textValue(), Dataset.AUDIT.getKeyWithPrefix());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6