use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.
the class ErrorConvertersTest method getBigQueryInsertError.
/**
* Generates a {@link BigQueryInsertError} with the {@link GenericRecord} and error message.
*
* @param record payload to be used for the test
* @param errorMessage error message for the test
*/
private static BigQueryInsertError getBigQueryInsertError(GenericRecord record, String errorMessage) {
Row beamRow = AvroUtils.toBeamRowStrict(record, AvroUtils.toBeamSchema(record.getSchema()));
TableRow tableRow = BigQueryUtils.toTableRow(beamRow);
TableReference tableReference = new TableReference();
return new BigQueryInsertError(tableRow.clone(), getInsertErrors(errorMessage), tableReference);
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.
the class ErrorConvertersTest method transformConvertsBigQueryInsertErrorToPubsubMessage.
@Test
@Category(NeedsRunner.class)
public void transformConvertsBigQueryInsertErrorToPubsubMessage() throws IOException {
GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord();
String errorMessage = "small-test-message";
BigQueryInsertError bigQueryInsertError = getBigQueryInsertError(expectedRecord, errorMessage);
ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter = getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema()));
PCollection<PubsubMessage> output = pipeline.apply(Create.of(bigQueryInsertError).withCoder(BigQueryInsertErrorCoder.of())).apply(converter);
PubsubMessage expectedMessage = getPubsubMessage(expectedRecord, bigQueryInsertError.getError().toString());
byte[] expectedPayload = expectedMessage.getPayload();
Map<String, String> expectedAttributes = expectedMessage.getAttributeMap();
PAssert.thatSingleton(output).satisfies(input -> {
assertThat(input.getPayload()).isEqualTo(expectedPayload);
assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes);
return null;
});
pipeline.run();
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryDeadLetterQueueSanitizerTest method testRunJsonError.
/**
* Tests the {@link FailsafeJavascriptUdf} when the input is valid.
*/
@Test
@Category(NeedsRunner.class)
public void testRunJsonError() {
String jsonMessage = "{\"key\":\"valué\"}";
TableRow tableRow = BigQueryConverters.convertJsonToTableRow(jsonMessage);
BigQueryInsertError errorMessage = getBigQueryInsertError(tableRow, "something happened");
List<String> expectedJson = Arrays.asList("{\"message\":{\"key\":\"valué\"},\"error_message\":\"GenericData{classInfo=[errors," + " index], {errors=[GenericData{classInfo=[debugInfo, location, message, reason]," + " {message=something happened}}]}}\"}");
PCollection<String> output = pipeline.apply("CreateInput", Create.of(errorMessage).withCoder(BigQueryInsertErrorCoder.of())).apply("BigQuery Failures", MapElements.via(new BigQueryDeadLetterQueueSanitizer()));
PAssert.that(output).containsInAnyOrder(expectedJson);
// Execute the test
pipeline.run();
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.
the class PubSubToBigQuery method run.
/**
* Runs the pipeline to completion with the specified options. This method does not wait until the
* pipeline is finished before returning. Invoke {@code result.waitUntilFinish()} on the result
* object to block until the pipeline is finished running if blocking programmatic execution is
* required.
*
* @param options The execution options.
* @return The pipeline result.
*/
public static PipelineResult run(Options options) {
Pipeline pipeline = Pipeline.create(options);
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(CODER.getEncodedTypeDescriptor(), CODER);
/*
* Steps:
* 1) Read messages in from Pub/Sub
* 2) Transform the PubsubMessages into TableRows
* - Transform message payload via UDF
* - Convert UDF result to TableRow objects
* 3) Write successful records out to BigQuery
* 4) Write failed records out to BigQuery
*/
/*
* Step #1: Read messages in from Pub/Sub
* Either from a Subscription or Topic
*/
PCollection<PubsubMessage> messages = null;
if (options.getUseSubscription()) {
messages = pipeline.apply("ReadPubSubSubscription", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription()));
} else {
messages = pipeline.apply("ReadPubSubTopic", PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic()));
}
PCollectionTuple convertedTableRows = messages.apply("ConvertMessageToTableRow", new PubsubMessageToTableRow(options));
/*
* Step #3: Write the successful records out to BigQuery
*/
WriteResult writeResult = convertedTableRows.get(TRANSFORM_OUT).apply("WriteSuccessfulRecords", BigQueryIO.writeTableRows().withoutValidation().withCreateDisposition(CreateDisposition.CREATE_NEVER).withWriteDisposition(WriteDisposition.WRITE_APPEND).withExtendedErrorInfo().withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).to(options.getOutputTableSpec()));
/*
* Step 3 Contd.
* Elements that failed inserts into BigQuery are extracted and converted to FailsafeElement
*/
PCollection<FailsafeElement<String, String>> failedInserts = writeResult.getFailedInsertsWithErr().apply("WrapInsertionErrors", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via((BigQueryInsertError e) -> wrapBigQueryInsertError(e))).setCoder(FAILSAFE_ELEMENT_CODER);
/*
* Step #4: Write records that failed table row transformation
* or conversion out to BigQuery deadletter table.
*/
PCollectionList.of(ImmutableList.of(convertedTableRows.get(UDF_DEADLETTER_OUT), convertedTableRows.get(TRANSFORM_DEADLETTER_OUT))).apply("Flatten", Flatten.pCollections()).apply("WriteFailedRecords", ErrorConverters.WritePubsubMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
// 5) Insert records that failed insert into deadletter table
failedInserts.apply("WriteFailedRecords", ErrorConverters.WriteStringMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
return pipeline.run();
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.
the class TextToBigQueryStreamingTest method wrapBigQueryInsertErrorReturnsValidJSON.
@Test
public void wrapBigQueryInsertErrorReturnsValidJSON() {
TableRow testRow = new TableRow().set(NAME_KEY, testPerson.name).set(AGE_KEY, testPerson.age);
InsertErrors insertErrors = new TableDataInsertAllResponse.InsertErrors();
ErrorProto errorProto = new ErrorProto().setMessage(ERROR_MESSAGE);
insertErrors.setErrors(ImmutableList.of(errorProto));
TableReference tableReference = new TableReference();
BigQueryInsertError bigQueryInsertError = new BigQueryInsertError(testRow.clone(), insertErrors, tableReference);
String expected = GSON.toJson(testPerson);
FailsafeElement<String, String> wrappedValue = TextToBigQueryStreaming.wrapBigQueryInsertError(bigQueryInsertError);
String actualOriginalPayload = wrappedValue.getOriginalPayload();
String actualPayload = wrappedValue.getPayload();
String actualErrorMessage = wrappedValue.getErrorMessage();
assertThat(actualOriginalPayload).isEqualTo(expected);
assertThat(actualPayload).isEqualTo(expected);
assertThat(actualErrorMessage).isEqualTo(GSON.toJson(insertErrors));
}
Aggregations