Search in sources :

Example 31 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class GCSToElasticsearchTest method testGCSToElasticsearchUdfE2E.

/**
 * Tests the {@link GCSToElasticsearch} pipeline using a Udf to parse the Csv.
 */
@Test
public void testGCSToElasticsearchUdfE2E() {
    final String record = "007,CA,26.23";
    final String stringifiedJsonRecord = "{\"id\":\"007\",\"state\":\"CA\",\"price\":26.23}";
    final FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(NullableCoder.of(StringUtf8Coder.of()), NullableCoder.of(StringUtf8Coder.of()));
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
    GCSToElasticsearchOptions options = PipelineOptionsFactory.create().as(GCSToElasticsearchOptions.class);
    options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH);
    options.setJavascriptTextTransformFunctionName("transform");
    options.setContainsHeaders(false);
    options.setInputFileSpec(NO_HEADER_CSV_FILE_PATH);
    options.setApiKey("key");
    // Build pipeline with no headers.
    PCollectionTuple readCsvOut = pipeline.apply("ReadCsv", CsvConverters.ReadCsv.newBuilder().setCsvFormat(options.getCsvFormat()).setDelimiter(options.getDelimiter()).setHasHeaders(options.getContainsHeaders()).setInputFileSpec(options.getInputFileSpec()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setFileEncoding(options.getCsvFileEncoding()).build()).apply("ConvertLine", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setHeaderTag(GCSToElasticsearch.CSV_HEADERS).setLineTag(GCSToElasticsearch.CSV_LINES).setUdfOutputTag(GCSToElasticsearch.PROCESSING_OUT).setUdfDeadletterTag(GCSToElasticsearch.PROCESSING_DEADLETTER_OUT).build());
    // Assert
    PAssert.that(readCsvOut.get(GCSToElasticsearch.PROCESSING_OUT)).satisfies(collection -> {
        FailsafeElement element = collection.iterator().next();
        assertThat(element.getOriginalPayload(), is(equalTo(record)));
        assertThat(element.getPayload(), is(equalTo(stringifiedJsonRecord)));
        return null;
    });
    // Execute pipeline
    pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) GCSToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test)

Example 32 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToElasticsearchTest method testPubSubToElasticsearchUdfE2E.

/**
 * Tests the {@link PubSubToElasticsearch} pipeline end-to-end with a UDF.
 */
@Test
public void testPubSubToElasticsearchUdfE2E() {
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER);
    coderRegistry.registerCoderForType(PubSubToElasticsearch.CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.CODER);
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setJavascriptTextTransformFunctionName("transform");
    options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH);
    options.setApiKey("key");
    PCollectionTuple pc = pipeline.apply(Create.of(goodTestMessages.get(0))).apply(PubSubMessageToJsonDocument.newBuilder().setJavascriptTextTransformFunctionName(options.getJavascriptTextTransformFunctionName()).setJavascriptTextTransformGcsPath(options.getJavascriptTextTransformGcsPath()).build());
    PAssert.that(pc.get(PubSubToElasticsearch.TRANSFORM_OUT)).satisfies(collection -> {
        FailsafeElement<PubsubMessage, String> element = collection.iterator().next();
        assertThat(element.getOriginalPayload().getPayload(), is(equalTo(goodTestMessages.get(0).getPayload())));
        return null;
    });
    // Execute pipeline
    pipeline.run(options);
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Example 33 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToElasticsearchTest method testPubSubToElasticsearchOnlyAttributesE2E.

/**
 * Tests the {@link PubSubToElasticsearch} pipeline end-to-end with an empty message payload but
 * attributes populated.
 */
@Test
public void testPubSubToElasticsearchOnlyAttributesE2E() {
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER);
    coderRegistry.registerCoderForType(PubSubToElasticsearch.CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.CODER);
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setApiKey("key");
    PCollectionTuple pc = pipeline.apply(Create.of(goodTestMessages.get(goodTestMessages.size() - 1))).apply(PubSubMessageToJsonDocument.newBuilder().setJavascriptTextTransformFunctionName(options.getJavascriptTextTransformFunctionName()).setJavascriptTextTransformGcsPath(options.getJavascriptTextTransformGcsPath()).build());
    PAssert.that(pc.get(PubSubToElasticsearch.TRANSFORM_OUT)).satisfies(collection -> {
        FailsafeElement<PubsubMessage, String> element = collection.iterator().next();
        assertThat(new Gson().fromJson(element.getPayload(), HashMap.class), is(equalTo(element.getOriginalPayload().getAttributeMap())));
        return null;
    });
    // Execute pipeline
    pipeline.run(options);
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) HashMap(java.util.HashMap) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Gson(com.google.gson.Gson) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Example 34 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToElasticsearchTest method testPubSubToElasticsearchBadUdfE2E.

/**
 * Tests the {@link PubSubToElasticsearch} pipeline end-to-end with a bad UDF.
 */
@Test
public void testPubSubToElasticsearchBadUdfE2E() {
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.FAILSAFE_ELEMENT_CODER);
    coderRegistry.registerCoderForType(PubSubToElasticsearch.CODER.getEncodedTypeDescriptor(), PubSubToElasticsearch.CODER);
    PubSubToElasticsearchOptions options = TestPipeline.testingPipelineOptions().as(PubSubToElasticsearchOptions.class);
    options.setErrorOutputTopic("projects/test/topics/test-error-topic");
    options.setJavascriptTextTransformFunctionName("transformBad");
    options.setJavascriptTextTransformGcsPath(BAD_TRANSFORM_FILE_PATH);
    options.setApiKey("key");
    PCollectionTuple pc = pipeline.apply(Create.of(badTestMessages.get(0))).apply(PubSubMessageToJsonDocument.newBuilder().setJavascriptTextTransformFunctionName(options.getJavascriptTextTransformFunctionName()).setJavascriptTextTransformGcsPath(options.getJavascriptTextTransformGcsPath()).build());
    PAssert.that(pc.get(PubSubToElasticsearch.TRANSFORM_ERROROUTPUT_OUT)).satisfies(collection -> {
        FailsafeElement<PubsubMessage, String> element = collection.iterator().next();
        assertThat(element.getOriginalPayload().getPayload(), is(equalTo(badTestMessages.get(0).getPayload())));
        return null;
    });
    PAssert.that(pc.get(PubSubToElasticsearch.TRANSFORM_OUT)).empty();
    // Execute pipeline
    pipeline.run(options);
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) PubSubToElasticsearchOptions(com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions) Test(org.junit.Test)

Example 35 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class DatastreamToDML method processElement.

@ProcessElement
public void processElement(ProcessContext context) {
    FailsafeElement<String, String> element = context.element();
    String jsonString = element.getPayload();
    ObjectMapper mapper = new ObjectMapper();
    JsonNode rowObj;
    try {
        rowObj = mapper.readTree(jsonString);
        DmlInfo dmlInfo = convertJsonToDmlInfo(rowObj, element.getOriginalPayload());
        // Null rows suggest no DML is required.
        if (dmlInfo != null) {
            LOG.debug("Output Data: {}", jsonString);
            context.output(KV.of(dmlInfo.getStateWindowKey(), dmlInfo));
        } else {
            LOG.debug("Skipping Null DmlInfo: {}", jsonString);
        }
    } catch (IOException e) {
        // TODO(dhercher): Push failure to DLQ collection
        LOG.error("IOException: {} :: {}", jsonString, e.toString());
    }
}
Also used : JsonNode(org.codehaus.jackson.JsonNode) IOException(java.io.IOException) DmlInfo(com.google.cloud.teleport.v2.values.DmlInfo) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Aggregations

FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)31 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)26 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)21 Test (org.junit.Test)21 Pipeline (org.apache.beam.sdk.Pipeline)14 TableRow (com.google.api.services.bigquery.model.TableRow)8 PubsubMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)6 DoFn (org.apache.beam.sdk.transforms.DoFn)6 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)5 IntegrationTest (com.google.cloud.teleport.v2.spanner.IntegrationTest)5 JSONObject (org.json.JSONObject)5 DeadLetterQueueManager (com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager)4 StringDeadLetterQueueSanitizer (com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer)4 DataStreamIO (com.google.cloud.teleport.v2.cdc.sources.DataStreamIO)4 FailsafeElementCoder (com.google.cloud.teleport.v2.coders.FailsafeElementCoder)4 GCSToSplunk.flattenErrorsAndConvertToString (com.google.cloud.teleport.v2.templates.GCSToSplunk.flattenErrorsAndConvertToString)4 PipelineResult (org.apache.beam.sdk.PipelineResult)4 SpannerConfig (org.apache.beam.sdk.io.gcp.spanner.SpannerConfig)4 KV (org.apache.beam.sdk.values.KV)4 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)3