use of com.google.cloud.teleport.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class ErrorConvertersTest method testFailedStringToPubsubMessageFn.
/**
* Test successful conversion of {@link FailsafeElement} records into {@link PubsubMessage} with
* attributes.
*/
@Test
@Category(NeedsRunner.class)
public void testFailedStringToPubsubMessageFn() {
String testMessage = "original-test-message";
FailsafeElement<String, String> element = FailsafeElement.of(testMessage, testMessage);
Instant expectedTimestamp = Instant.now();
String errorMessage = "my-error-message";
element.setErrorMessage(errorMessage);
TimestampedValue<FailsafeElement<String, String>> input = TimestampedValue.of(element, expectedTimestamp);
PCollection<PubsubMessage> pCollection = pipeline.apply(Create.timestamped(input).withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))).apply(ParDo.of(new FailedStringToPubsubMessageFn()));
String expectedTimestampString = FailedStringToPubsubMessageFn.TIMESTAMP_FORMATTER.print(expectedTimestamp.toDateTime(DateTimeZone.UTC));
PAssert.that(pCollection).satisfies(collection -> {
PubsubMessage actual = collection.iterator().next();
assertThat(new String(actual.getPayload(), StandardCharsets.UTF_8), is(equalTo(testMessage)));
assertThat(actual.getAttribute(FailedStringToPubsubMessageFn.ERROR_MESSAGE), is(equalTo(errorMessage)));
assertThat(actual.getAttribute(FailedStringToPubsubMessageFn.TIMESTAMP), is(equalTo(expectedTimestampString)));
return null;
});
pipeline.run();
}
use of com.google.cloud.teleport.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class PubSubToBigQuery method run.
/**
* Runs the pipeline to completion with the specified options. This method does not wait until the
* pipeline is finished before returning. Invoke {@code result.waitUntilFinish()} on the result
* object to block until the pipeline is finished running if blocking programmatic execution is
* required.
*
* @param options The execution options.
* @return The pipeline result.
*/
public static PipelineResult run(Options options) {
Pipeline pipeline = Pipeline.create(options);
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(CODER.getEncodedTypeDescriptor(), CODER);
/*
* Steps:
* 1) Read messages in from Pub/Sub
* 2) Transform the PubsubMessages into TableRows
* - Transform message payload via UDF
* - Convert UDF result to TableRow objects
* 3) Write successful records out to BigQuery
* 4) Write failed records out to BigQuery
*/
/*
* Step #1: Read messages in from Pub/Sub
* Either from a Subscription or Topic
*/
PCollection<PubsubMessage> messages = null;
if (options.getUseSubscription()) {
messages = pipeline.apply("ReadPubSubSubscription", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription()));
} else {
messages = pipeline.apply("ReadPubSubTopic", PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic()));
}
PCollectionTuple convertedTableRows = messages.apply("ConvertMessageToTableRow", new PubsubMessageToTableRow(options));
/*
* Step #3: Write the successful records out to BigQuery
*/
WriteResult writeResult = convertedTableRows.get(TRANSFORM_OUT).apply("WriteSuccessfulRecords", BigQueryIO.writeTableRows().withoutValidation().withCreateDisposition(CreateDisposition.CREATE_NEVER).withWriteDisposition(WriteDisposition.WRITE_APPEND).withExtendedErrorInfo().withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).to(options.getOutputTableSpec()));
/*
* Step 3 Contd.
* Elements that failed inserts into BigQuery are extracted and converted to FailsafeElement
*/
PCollection<FailsafeElement<String, String>> failedInserts = writeResult.getFailedInsertsWithErr().apply("WrapInsertionErrors", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via((BigQueryInsertError e) -> wrapBigQueryInsertError(e))).setCoder(FAILSAFE_ELEMENT_CODER);
/*
* Step #4: Write records that failed table row transformation
* or conversion out to BigQuery deadletter table.
*/
PCollectionList.of(ImmutableList.of(convertedTableRows.get(UDF_DEADLETTER_OUT), convertedTableRows.get(TRANSFORM_DEADLETTER_OUT))).apply("Flatten", Flatten.pCollections()).apply("WriteFailedRecords", ErrorConverters.WritePubsubMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
// 5) Insert records that failed insert into deadletter table
failedInserts.apply("WriteFailedRecords", ErrorConverters.WriteStringMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
return pipeline.run();
}
use of com.google.cloud.teleport.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class TextToBigQueryStreaming method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(TextToBigQueryStreamingOptions options) {
// Create the pipeline
Pipeline pipeline = Pipeline.create(options);
// Register the coder for pipeline
FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
/*
* Steps:
* 1) Read from the text source continuously.
* 2) Convert to FailsafeElement.
* 3) Apply Javascript udf transformation.
* - Tag records that were successfully transformed and those
* that failed transformation.
* 4) Convert records to TableRow.
* - Tag records that were successfully converted and those
* that failed conversion.
* 5) Insert successfully converted records into BigQuery.
* - Errors encountered while streaming will be sent to deadletter table.
* 6) Insert records that failed into deadletter table.
*/
PCollectionTuple transformedOutput = pipeline.apply("ReadFromSource", TextIO.read().from(options.getInputFilePattern()).watchForNewFiles(DEFAULT_POLL_INTERVAL, Growth.never())).apply("ConvertToFailsafeElement", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via(input -> FailsafeElement.of(input, input))).apply("ApplyUDFTransformation", FailsafeJavascriptUdf.<String>newBuilder().setFileSystemPath(options.getJavascriptTextTransformGcsPath()).setFunctionName(options.getJavascriptTextTransformFunctionName()).setSuccessTag(UDF_OUT).setFailureTag(UDF_DEADLETTER_OUT).build());
PCollectionTuple convertedTableRows = transformedOutput.get(UDF_OUT).apply("ConvertJSONToTableRow", FailsafeJsonToTableRow.<String>newBuilder().setSuccessTag(TRANSFORM_OUT).setFailureTag(TRANSFORM_DEADLETTER_OUT).build());
WriteResult writeResult = convertedTableRows.get(TRANSFORM_OUT).apply("InsertIntoBigQuery", BigQueryIO.writeTableRows().withJsonSchema(getSchemaFromGCS(options.getJSONPath())).to(options.getOutputTable()).withExtendedErrorInfo().withoutValidation().withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(WriteDisposition.WRITE_APPEND).withMethod(Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).withCustomGcsTempLocation(options.getBigQueryLoadingTemporaryDirectory()));
// Elements that failed inserts into BigQuery are extracted and converted to FailsafeElement
PCollection<FailsafeElement<String, String>> failedInserts = writeResult.getFailedInsertsWithErr().apply("WrapInsertionErrors", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via(TextToBigQueryStreaming::wrapBigQueryInsertError));
// 6) Insert records that failed transformation or conversion into deadletter table
PCollectionList.of(ImmutableList.of(transformedOutput.get(UDF_DEADLETTER_OUT), convertedTableRows.get(TRANSFORM_DEADLETTER_OUT), failedInserts)).apply("Flatten", Flatten.pCollections()).apply("WriteFailedRecords", WriteStringMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTable(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
return pipeline.run();
}
use of com.google.cloud.teleport.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class PubSubToSplunk method run.
/**
* Runs the pipeline to completion with the specified options. This method does not wait until the
* pipeline is finished before returning. Invoke {@code result.waitUntilFinish()} on the result
* object to block until the pipeline is finished running if blocking programmatic execution is
* required.
*
* @param options The execution options.
* @return The pipeline result.
*/
public static PipelineResult run(PubSubToSplunkOptions options) {
Pipeline pipeline = Pipeline.create(options);
// Register coders.
CoderRegistry registry = pipeline.getCoderRegistry();
registry.registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());
registry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
/*
* Steps:
* 1) Read messages in from Pub/Sub
* 2) Convert message to FailsafeElement for processing.
* 3) Apply user provided UDF (if any) on the input strings.
* 4) Convert successfully transformed messages into SplunkEvent objects
* 5) Write SplunkEvents to Splunk's HEC end point.
* 5a) Wrap write failures into a FailsafeElement.
* 6) Collect errors from UDF transform (#3), SplunkEvent transform (#4)
* and writing to Splunk HEC (#5) and stream into a Pub/Sub deadletter topic.
*/
// 1) Read messages in from Pub/Sub
PCollection<String> stringMessages = pipeline.apply("ReadMessages", new ReadMessages(options.getInputSubscription(), options.getIncludePubsubMessage()));
// 2) Convert message to FailsafeElement for processing.
PCollectionTuple transformedOutput = stringMessages.apply("ConvertToFailsafeElement", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via(input -> FailsafeElement.of(input, input))).apply("ApplyUDFTransformation", FailsafeJavascriptUdf.<String>newBuilder().setFileSystemPath(options.getJavascriptTextTransformGcsPath()).setFunctionName(options.getJavascriptTextTransformFunctionName()).setLoggingEnabled(ValueProvider.StaticValueProvider.of(true)).setSuccessTag(UDF_OUT).setFailureTag(UDF_DEADLETTER_OUT).build());
// 4) Convert successfully transformed messages into SplunkEvent objects
PCollectionTuple convertToEventTuple = transformedOutput.get(UDF_OUT).apply("ConvertToSplunkEvent", SplunkConverters.failsafeStringToSplunkEvent(SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));
// 5) Write SplunkEvents to Splunk's HEC end point.
PCollection<SplunkWriteError> writeErrors = convertToEventTuple.get(SPLUNK_EVENT_OUT).apply("WriteToSplunk", SplunkIO.writeBuilder().withToken(new TokenNestedValueProvider(options.getTokenSecretId(), options.getTokenKMSEncryptionKey(), options.getToken(), options.getTokenSource())).withUrl(options.getUrl()).withBatchCount(options.getBatchCount()).withParallelism(options.getParallelism()).withDisableCertificateValidation(options.getDisableCertificateValidation()).withRootCaCertificatePath(options.getRootCaCertificatePath()).withEnableBatchLogs(options.getEnableBatchLogs()).build());
// 5a) Wrap write failures into a FailsafeElement.
PCollection<FailsafeElement<String, String>> wrappedSplunkWriteErrors = writeErrors.apply("WrapSplunkWriteErrors", ParDo.of(new DoFn<SplunkWriteError, FailsafeElement<String, String>>() {
@ProcessElement
public void processElement(ProcessContext context) {
SplunkWriteError error = context.element();
FailsafeElement<String, String> failsafeElement = FailsafeElement.of(error.payload(), error.payload());
if (error.statusMessage() != null) {
failsafeElement.setErrorMessage(error.statusMessage());
}
if (error.statusCode() != null) {
failsafeElement.setErrorMessage(String.format("Splunk write status code: %d", error.statusCode()));
}
context.output(failsafeElement);
}
}));
// 6) Collect errors from UDF transform (#4), SplunkEvent transform (#5)
// and writing to Splunk HEC (#6) and stream into a Pub/Sub deadletter topic.
PCollectionList.of(ImmutableList.of(convertToEventTuple.get(SPLUNK_EVENT_DEADLETTER_OUT), wrappedSplunkWriteErrors, transformedOutput.get(UDF_DEADLETTER_OUT))).apply("FlattenErrors", Flatten.pCollections()).apply("WriteFailedRecords", ErrorConverters.WriteStringMessageErrorsToPubSub.newBuilder().setErrorRecordsTopic(options.getOutputDeadletterTopic()).build());
return pipeline.run();
}
Aggregations