Search in sources :

Example 1 with BigQueryInsertError

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.

the class ErrorConvertersTest method getBigQueryInsertError.

/**
 * Generates a {@link BigQueryInsertError} with the {@link GenericRecord} and error message.
 *
 * @param record payload to be used for the test
 * @param errorMessage error message for the test
 */
private static BigQueryInsertError getBigQueryInsertError(GenericRecord record, String errorMessage) {
    Row beamRow = AvroUtils.toBeamRowStrict(record, AvroUtils.toBeamSchema(record.getSchema()));
    TableRow tableRow = BigQueryUtils.toTableRow(beamRow);
    TableReference tableReference = new TableReference();
    return new BigQueryInsertError(tableRow.clone(), getInsertErrors(errorMessage), tableReference);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryInsertError(org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError) TableRow(com.google.api.services.bigquery.model.TableRow) Row(org.apache.beam.sdk.values.Row)

Example 2 with BigQueryInsertError

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.

the class ErrorConvertersTest method transformConvertsBigQueryInsertErrorToPubsubMessage.

@Test
@Category(NeedsRunner.class)
public void transformConvertsBigQueryInsertErrorToPubsubMessage() throws IOException {
    GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord();
    String errorMessage = "small-test-message";
    BigQueryInsertError bigQueryInsertError = getBigQueryInsertError(expectedRecord, errorMessage);
    ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter = getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema()));
    PCollection<PubsubMessage> output = pipeline.apply(Create.of(bigQueryInsertError).withCoder(BigQueryInsertErrorCoder.of())).apply(converter);
    PubsubMessage expectedMessage = getPubsubMessage(expectedRecord, bigQueryInsertError.getError().toString());
    byte[] expectedPayload = expectedMessage.getPayload();
    Map<String, String> expectedAttributes = expectedMessage.getAttributeMap();
    PAssert.thatSingleton(output).satisfies(input -> {
        assertThat(input.getPayload()).isEqualTo(expectedPayload);
        assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes);
        return null;
    });
    pipeline.run();
}
Also used : BigQueryInsertError(org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError) GenericRecord(org.apache.avro.generic.GenericRecord) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with BigQueryInsertError

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryDeadLetterQueueSanitizerTest method testRunJsonError.

/**
 * Tests the {@link FailsafeJavascriptUdf} when the input is valid.
 */
@Test
@Category(NeedsRunner.class)
public void testRunJsonError() {
    String jsonMessage = "{\"key\":\"valué\"}";
    TableRow tableRow = BigQueryConverters.convertJsonToTableRow(jsonMessage);
    BigQueryInsertError errorMessage = getBigQueryInsertError(tableRow, "something happened");
    List<String> expectedJson = Arrays.asList("{\"message\":{\"key\":\"valué\"},\"error_message\":\"GenericData{classInfo=[errors," + " index], {errors=[GenericData{classInfo=[debugInfo, location, message, reason]," + " {message=something happened}}]}}\"}");
    PCollection<String> output = pipeline.apply("CreateInput", Create.of(errorMessage).withCoder(BigQueryInsertErrorCoder.of())).apply("BigQuery Failures", MapElements.via(new BigQueryDeadLetterQueueSanitizer()));
    PAssert.that(output).containsInAnyOrder(expectedJson);
    // Execute the test
    pipeline.run();
}
Also used : TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryInsertError(org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with BigQueryInsertError

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.

the class PubSubToBigQuery method run.

/**
 * Runs the pipeline to completion with the specified options. This method does not wait until the
 * pipeline is finished before returning. Invoke {@code result.waitUntilFinish()} on the result
 * object to block until the pipeline is finished running if blocking programmatic execution is
 * required.
 *
 * @param options The execution options.
 * @return The pipeline result.
 */
public static PipelineResult run(Options options) {
    Pipeline pipeline = Pipeline.create(options);
    CoderRegistry coderRegistry = pipeline.getCoderRegistry();
    coderRegistry.registerCoderForType(CODER.getEncodedTypeDescriptor(), CODER);
    /*
     * Steps:
     *  1) Read messages in from Pub/Sub
     *  2) Transform the PubsubMessages into TableRows
     *     - Transform message payload via UDF
     *     - Convert UDF result to TableRow objects
     *  3) Write successful records out to BigQuery
     *  4) Write failed records out to BigQuery
     */
    /*
     * Step #1: Read messages in from Pub/Sub
     * Either from a Subscription or Topic
     */
    PCollection<PubsubMessage> messages = null;
    if (options.getUseSubscription()) {
        messages = pipeline.apply("ReadPubSubSubscription", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription()));
    } else {
        messages = pipeline.apply("ReadPubSubTopic", PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic()));
    }
    PCollectionTuple convertedTableRows = messages.apply("ConvertMessageToTableRow", new PubsubMessageToTableRow(options));
    /*
     * Step #3: Write the successful records out to BigQuery
     */
    WriteResult writeResult = convertedTableRows.get(TRANSFORM_OUT).apply("WriteSuccessfulRecords", BigQueryIO.writeTableRows().withoutValidation().withCreateDisposition(CreateDisposition.CREATE_NEVER).withWriteDisposition(WriteDisposition.WRITE_APPEND).withExtendedErrorInfo().withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).to(options.getOutputTableSpec()));
    /*
     * Step 3 Contd.
     * Elements that failed inserts into BigQuery are extracted and converted to FailsafeElement
     */
    PCollection<FailsafeElement<String, String>> failedInserts = writeResult.getFailedInsertsWithErr().apply("WrapInsertionErrors", MapElements.into(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor()).via((BigQueryInsertError e) -> wrapBigQueryInsertError(e))).setCoder(FAILSAFE_ELEMENT_CODER);
    /*
     * Step #4: Write records that failed table row transformation
     * or conversion out to BigQuery deadletter table.
     */
    PCollectionList.of(ImmutableList.of(convertedTableRows.get(UDF_DEADLETTER_OUT), convertedTableRows.get(TRANSFORM_DEADLETTER_OUT))).apply("Flatten", Flatten.pCollections()).apply("WriteFailedRecords", ErrorConverters.WritePubsubMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
    // 5) Insert records that failed insert into deadletter table
    failedInserts.apply("WriteFailedRecords", ErrorConverters.WriteStringMessageErrors.newBuilder().setErrorRecordsTable(ValueProviderUtils.maybeUseDefaultDeadletterTable(options.getOutputDeadletterTable(), options.getOutputTableSpec(), DEFAULT_DEADLETTER_TABLE_SUFFIX)).setErrorRecordsTableSchema(ResourceUtils.getDeadletterTableSchemaJson()).build());
    return pipeline.run();
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) WriteResult(org.apache.beam.sdk.io.gcp.bigquery.WriteResult) TextToBigQueryStreaming.wrapBigQueryInsertError(com.google.cloud.teleport.templates.TextToBigQueryStreaming.wrapBigQueryInsertError) BigQueryInsertError(org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.values.FailsafeElement)

Example 5 with BigQueryInsertError

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError in project DataflowTemplates by GoogleCloudPlatform.

the class TextToBigQueryStreamingTest method wrapBigQueryInsertErrorReturnsValidJSON.

@Test
public void wrapBigQueryInsertErrorReturnsValidJSON() {
    TableRow testRow = new TableRow().set(NAME_KEY, testPerson.name).set(AGE_KEY, testPerson.age);
    InsertErrors insertErrors = new TableDataInsertAllResponse.InsertErrors();
    ErrorProto errorProto = new ErrorProto().setMessage(ERROR_MESSAGE);
    insertErrors.setErrors(ImmutableList.of(errorProto));
    TableReference tableReference = new TableReference();
    BigQueryInsertError bigQueryInsertError = new BigQueryInsertError(testRow.clone(), insertErrors, tableReference);
    String expected = GSON.toJson(testPerson);
    FailsafeElement<String, String> wrappedValue = TextToBigQueryStreaming.wrapBigQueryInsertError(bigQueryInsertError);
    String actualOriginalPayload = wrappedValue.getOriginalPayload();
    String actualPayload = wrappedValue.getPayload();
    String actualErrorMessage = wrappedValue.getErrorMessage();
    assertThat(actualOriginalPayload).isEqualTo(expected);
    assertThat(actualPayload).isEqualTo(expected);
    assertThat(actualErrorMessage).isEqualTo(GSON.toJson(insertErrors));
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryInsertError(org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError) InsertErrors(com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors) Test(org.junit.Test)

Aggregations

BigQueryInsertError (org.apache.beam.sdk.io.gcp.bigquery.BigQueryInsertError)7 TableRow (com.google.api.services.bigquery.model.TableRow)4 PubsubMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)4 Test (org.junit.Test)4 TableReference (com.google.api.services.bigquery.model.TableReference)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 Pipeline (org.apache.beam.sdk.Pipeline)2 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)2 WriteResult (org.apache.beam.sdk.io.gcp.bigquery.WriteResult)2 Category (org.junit.experimental.categories.Category)2 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)1 InsertErrors (com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors)1 TableId (com.google.cloud.bigquery.TableId)1 TextToBigQueryStreaming.wrapBigQueryInsertError (com.google.cloud.teleport.templates.TextToBigQueryStreaming.wrapBigQueryInsertError)1 BigQueryDeadLetterQueueSanitizer (com.google.cloud.teleport.v2.cdc.dlq.BigQueryDeadLetterQueueSanitizer)1 DeadLetterQueueManager (com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager)1 StringDeadLetterQueueSanitizer (com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer)1 BigQueryMappers (com.google.cloud.teleport.v2.cdc.mappers.BigQueryMappers)1 FailsafeElementCoder (com.google.cloud.teleport.v2.coders.FailsafeElementCoder)1 WindowedFilenamePolicy (com.google.cloud.teleport.v2.io.WindowedFilenamePolicy)1