use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerTransactionWriterDoFn method processElement.
@ProcessElement
public void processElement(ProcessContext c) {
FailsafeElement<String, String> msg = c.element();
Ddl ddl = c.sideInput(ddlView);
processedEvents.inc();
/*
* Try Catch block to capture any exceptions that might occur while processing
* DataStream events while writing to Cloud Spanner. All Exceptions that are caught
* can be retried based on the exception type.
*/
try {
JsonNode changeEvent = mapper.readTree(msg.getPayload());
ChangeEventContext changeEventContext = ChangeEventContextFactory.createChangeEventContext(changeEvent, ddl, shadowTablePrefix, sourceType);
// Sequence information for the current change event.
ChangeEventSequence currentChangeEventSequence = ChangeEventSequenceFactory.createChangeEventSequenceFromChangeEventContext(changeEventContext);
// Start transaction
spannerAccessor.getDatabaseClient().readWriteTransaction().run((TransactionCallable<Void>) transaction -> {
ChangeEventSequence previousChangeEventSequence = ChangeEventSequenceFactory.createChangeEventSequenceFromShadowTable(transaction, changeEventContext);
if (previousChangeEventSequence != null && previousChangeEventSequence.compareTo(currentChangeEventSequence) >= 0) {
return null;
}
transaction.buffer(changeEventContext.getMutations());
return null;
});
com.google.cloud.Timestamp timestamp = com.google.cloud.Timestamp.now();
c.output(timestamp);
sucessfulEvents.inc();
} catch (InvalidChangeEventException e) {
// Errors that result from invalid change events.
outputWithErrorTag(c, msg, e, SpannerTransactionWriter.PERMANENT_ERROR_TAG);
skippedEvents.inc();
} catch (ChangeEventConvertorException e) {
// Errors that result during Event conversions are not retryable.
outputWithErrorTag(c, msg, e, SpannerTransactionWriter.PERMANENT_ERROR_TAG);
conversionErrors.inc();
} catch (SpannerException se) {
/* Errors that happen when writing to Cloud Spanner are considered retryable.
* Since all event convertion errors are caught beforehand as permanent errors,
* any other errors encountered while writing to Cloud Spanner can be retried.
* Examples include:
* 1. Deadline exceeded errors from Cloud Spanner.
* 2. Failures due to foreign key/interleaved table constraints.
* 3. Any transient errors in Cloud Spanner.
*/
outputWithErrorTag(c, msg, se, SpannerTransactionWriter.RETRYABLE_ERROR_TAG);
retryableErrors.inc();
} catch (Exception e) {
// Any other errors are considered severe and not retryable.
outputWithErrorTag(c, msg, e, SpannerTransactionWriter.PERMANENT_ERROR_TAG);
failedEvents.inc();
}
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class ErrorConvertersTest method testFailedStringMessageToTableRowFn.
/**
* Tests that {@link ErrorConverters.FailedStringToTableRowFn} properly formats failed String
* objects into {@link TableRow} objects to save to BigQuery.
*/
@Test
public void testFailedStringMessageToTableRowFn() {
// Test input
final String message = "Super secret";
final String errorMessage = "Failed to parse input JSON";
final String stacktrace = "Error at com.google.cloud.teleport.TextToBigQueryStreaming";
final FailsafeElement<String, String> input = FailsafeElement.of(message, message).setErrorMessage(errorMessage).setStacktrace(stacktrace);
final Instant timestamp = new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant();
// Register the coder for the pipeline. This prevents having to invoke .setCoder() on
// many transforms.
FailsafeElementCoder<String, String> coder = FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
CoderRegistry coderRegistry = pipeline.getCoderRegistry();
coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
// Build pipeline
PCollection<TableRow> output = pipeline.apply("CreateInput", Create.timestamped(TimestampedValue.of(input, timestamp)).withCoder(coder)).apply("FailedRecordToTableRow", ParDo.of(new FailedStringToTableRowFn()));
// Assert
PAssert.that(output).satisfies(collection -> {
final TableRow result = collection.iterator().next();
assertThat(result.get("timestamp")).isEqualTo("2022-02-22 22:22:22.222000");
assertThat(result.get("attributes")).isNull();
assertThat(result.get("payloadString")).isEqualTo(message);
assertThat(result.get("payloadBytes")).isNotNull();
assertThat(result.get("errorMessage")).isEqualTo(errorMessage);
assertThat(result.get("stacktrace")).isEqualTo(stacktrace);
return null;
});
// Execute pipeline
pipeline.run();
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToMongoDB method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Push the data to MongoDB
*/
Pipeline pipeline = Pipeline.create(options);
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getInputSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).apply(Flatten.pCollections());
/**
* Does below steps:
* 1. Converts JSON to BSON documents.
* 2. Removes the metadata fileds.
* 3. Inserts the data into MongoDB collections.
*/
jsonRecords.apply("jsonToDocuments", MapElements.via(new SimpleFunction<FailsafeElement<String, String>, Document>() {
@Override
public Document apply(FailsafeElement<String, String> jsonString) {
String s = jsonString.getOriginalPayload();
Document doc = Document.parse(s);
return removeTableRowFields(doc, MAPPER_IGNORE_FIELDS);
}
})).apply("Write To MongoDB", MongoDbIO.write().withUri(options.getMongoDBUri()).withDatabase(options.getDatabase()).withCollection(options.getCollection()));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToPostgres method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to Postgres DML Objects
* 3) Filter stale rows using stateful PK transform
* 4) Write DML statements to Postgres
*/
Pipeline pipeline = Pipeline.create(options);
String jdbcDriverConnectionString = String.format("jdbc:postgresql://%s:%s/%s", options.getDatabaseHost(), options.getDatabasePort(), options.getDatabaseName());
CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = CdcJdbcIO.DataSourceConfiguration.create("org.postgresql.Driver", jdbcDriverConnectionString).withUsername(options.getDatabaseUser()).withPassword(options.getDatabasePassword()).withMaxIdleConnections(new Integer(0));
validateOptions(options, dataSourceConfiguration);
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withHashColumnValue("_metadata_row_id", "rowid"));
/*
* Stage 2: Write JSON Strings to Postgres Insert Strings
* a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
* Stage 3) Filter stale rows using stateful PK transform
*/
PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to Postgres DML", CreateDml.createDmlObjects(dataSourceConfiguration)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
/*
* Stage 4: Write Inserts to CloudSQL
*/
dmlStatements.apply("Write to Postgres", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {
public String formatStatement(DmlInfo element) {
return element.getDmlSql();
}
}));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DatabaseMigrationUtils method convertJsonToDmlInfo.
public KV<String, DmlInfo> convertJsonToDmlInfo(FailsafeElement<String, String> element) {
String jsonString = element.getPayload();
ObjectMapper mapper = new ObjectMapper();
JsonNode rowObj;
try {
rowObj = mapper.readTree(jsonString);
} catch (IOException e) {
LOG.error("IOException: {} :: {}", jsonString, e.toString());
DmlInfo dmlInfo = DmlInfo.of(element.getOriginalPayload(), "", "", "", new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>());
// TODO(dhercher): how should we handle bad data?
return KV.of(jsonString, dmlInfo);
}
try {
// Oracle uses upper case while Postgres uses all lowercase.
// We lowercase the values of these metadata fields to align with
// our schema conversion rules.
String schemaName = this.getPostgresSchemaName(rowObj);
String tableName = this.getPostgresTableName(rowObj);
Map<String, String> tableSchema = this.getTableSchema(schemaName, tableName);
List<String> primaryKeys = this.getPrimaryKeys(schemaName, tableName, rowObj);
List<String> orderByFields = Arrays.asList("_metadata_timestamp", "_metadata_scn");
List<String> primaryKeyValues = getFieldValues(rowObj, primaryKeys);
List<String> orderByValues = getFieldValues(rowObj, orderByFields);
if (tableSchema.isEmpty()) {
// If the table DNE we supply an empty SQL value (NOOP)
DmlInfo dmlInfo = DmlInfo.of(element.getOriginalPayload(), "", schemaName, tableName, primaryKeys, orderByFields, primaryKeyValues, orderByValues);
return KV.of(jsonString, dmlInfo);
}
String dmlSql;
if (rowObj.get("_metadata_deleted").asBoolean()) {
dmlSql = convertJsonToDeleteSql(rowObj, tableSchema, schemaName, tableName, primaryKeys);
} else if (primaryKeys.size() == 0) {
// TODO(dhercher): Do we choose to support this case?
dmlSql = convertJsonToInsertSql(rowObj, tableSchema, schemaName, tableName);
} else {
dmlSql = convertJsonToUpsertSql(rowObj, tableSchema, schemaName, tableName, primaryKeys);
}
DmlInfo dmlInfo = DmlInfo.of(element.getOriginalPayload(), dmlSql, schemaName, tableName, primaryKeys, orderByFields, primaryKeyValues, orderByValues);
return KV.of(dmlInfo.getStateWindowKey(), dmlInfo);
} catch (Exception e) {
LOG.error("Value Error: {} :: {}", rowObj.toString(), e.toString());
DmlInfo dmlInfo = DmlInfo.of(element.getOriginalPayload(), "", "", "", new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>());
// TODO(dhercher): how should we handle bad data?
return KV.of(jsonString, dmlInfo);
}
}
Aggregations