use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class PubSubCdcToBigQueryTest method testPubSubCdcToBigQueryApplyJavaScriptUDF.
/**
* Tests the {@link PubSubCdcToBigQuery} pipeline end-to-end.
*/
@Test
public void testPubSubCdcToBigQueryApplyJavaScriptUDF() throws Exception {
// Test input
final String payload = "{\"ticker\": \"GOOGL\", \"price\": 1006.94}";
final PubsubMessage message = new PubsubMessage(payload.getBytes(), ImmutableMap.of("id", "123", "type", "custom_event"));
final Instant timestamp = new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant();
final FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
// Parameters
String transformPath = TRANSFORM_FILE_PATH;
String transformFunction = "transform";
PubSubCdcToBigQuery.Options options = PipelineOptionsFactory.create().as(PubSubCdcToBigQuery.Options.class);
options.setJavascriptTextTransformGcsPath(transformPath);
options.setJavascriptTextTransformFunctionName(transformFunction);
InputUDFToTableRow<String> deadletterHandler = new InputUDFToTableRow<String>(options.getJavascriptTextTransformGcsPath(), options.getJavascriptTextTransformFunctionName(), options.getPythonTextTransformGcsPath(), options.getPythonTextTransformFunctionName(), options.getRuntimeRetries(), coder);
// Build pipeline
PCollectionTuple transformOut = pipeline.apply("CreateInput", Create.timestamped(TimestampedValue.of(message, timestamp)).withCoder(PubsubMessageWithAttributesCoder.of())).apply("ConvertPubSubToFailsafe", ParDo.of(new PubSubToFailSafeElement())).apply("ConvertMessageToTableRow", deadletterHandler);
transformOut.get(deadletterHandler.udfDeadletterOut).setCoder(coder);
transformOut.get(deadletterHandler.transformDeadletterOut).setCoder(coder);
// Assert
PAssert.that(transformOut.get(deadletterHandler.udfDeadletterOut)).empty();
PAssert.that(transformOut.get(deadletterHandler.transformDeadletterOut)).empty();
PAssert.that(transformOut.get(deadletterHandler.transformOut)).satisfies(collection -> {
TableRow result = collection.iterator().next();
assertThat(result.get("ticker"), is(equalTo("GOOGL")));
assertThat(result.get("price"), is(equalTo(1006.94)));
return null;
});
// Execute pipeline
pipeline.run();
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class KafkaToGCSTest method testFileFormatFactoryInvalid.
/**
* Test whether {@link FileFormatFactory} maps the output file format to the transform to be
* carried out. And throws illegal argument exception if invalid file format is passed.
*/
@Test
@Category(NeedsRunner.class)
public void testFileFormatFactoryInvalid() {
// Create the test input.
final String key = "Name";
final String value = "Generic";
final KV<String, String> message = KV.of(key, value);
final String outputDirectory = "gs://bucket_name/path/to/output-location";
final String outputFileFormat = "json".toUpperCase();
final String outputFilenamePrefix = "output";
final Integer numShards = 1;
final String tempOutputDirectory = "gs://bucket_name/path/to/temp-location";
KafkaToGCSOptions options = PipelineOptionsFactory.create().as(KafkaToGCSOptions.class);
options.setOutputFileFormat(outputFileFormat);
options.setOutputDirectory(outputDirectory);
options.setOutputFilenamePrefix(outputFilenamePrefix);
options.setNumShards(numShards);
options.setTempLocation(tempOutputDirectory);
exception.expect(IllegalArgumentException.class);
PCollection<KV<String, String>> records = pipeline.apply("CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
records.apply("WriteToGCS", FileFormatFactory.newBuilder().setOptions(options).build());
// Run the pipeline.
pipeline.run();
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class KafkaToPubsub method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) {
KafkaToPubsubOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(KafkaToPubsubOptions.class);
run(options);
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class KafkaToGCS method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) {
KafkaToGCSOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(KafkaToGCSOptions.class);
run(options);
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class CsvConvertersTest method testLineToFailsafeJsonNoHeadersUdfDeadletter.
/**
* Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement}
* correctly using a Javascript Udf. Udf processing is handled by {@link
* JavascriptTextTransformer}. Should output record to deadletter table tag.
*/
@Test
public void testLineToFailsafeJsonNoHeadersUdfDeadletter() {
FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER;
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
PCollection<String> lines = pipeline.apply(Create.of(BAD_JSON_STRING_RECORD).withCoder(StringUtf8Coder.of()));
PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines);
CsvConverters.CsvPipelineOptions options = PipelineOptionsFactory.create().as(CsvConverters.CsvPipelineOptions.class);
options.setDelimiter(",");
options.setJavascriptTextTransformGcsPath(SCRIPT_PARSE_EXCEPTION_FILE_PATH);
options.setJavascriptTextTransformFunctionName("transform");
PCollectionTuple failsafe = linesTuple.apply("TestLineToFailsafeJsonNoHeadersUdfBad", CsvConverters.LineToFailsafeJson.newBuilder().setDelimiter(options.getDelimiter()).setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()).setUdfFunctionName(options.getJavascriptTextTransformFunctionName()).setJsonSchemaPath(options.getJsonSchemaPath()).setJsonSchemaPath(null).setHeaderTag(CSV_HEADERS).setLineTag(CSV_LINES).setUdfOutputTag(PROCESSING_OUT).setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT).build());
PAssert.that(failsafe.get(PROCESSING_OUT)).empty();
PAssert.that(failsafe.get(PROCESSING_DEADLETTER_OUT)).satisfies(collection -> {
FailsafeElement result = collection.iterator().next();
assertThat(result.getPayload(), is(equalTo(BAD_JSON_STRING_RECORD)));
return null;
});
pipeline.run();
}
Aggregations