use of com.google.cloud.datacatalog.v1.Entry in project DataflowTemplates by GoogleCloudPlatform.
the class PubSubChangeConsumer method handleBatch.
@Override
public void handleBatch(List<SourceRecord> records, RecordCommitter committer) throws InterruptedException {
ImmutableList.Builder<ApiFuture<String>> futureListBuilder = ImmutableList.builder();
Set<Publisher> usedPublishers = new HashSet<>();
// TODO(pabloem): Improve the commit logic.
for (SourceRecord r : records) {
// Debezium publishes updates for each table in a separate Kafka topic, which is the fully
// qualified name of the MySQL table (e.g. dbInstanceName.databaseName.table_name).
String tableName = r.topic();
if (whitelistedTables.contains(tableName)) {
Row updateRecord = translator.translate(r);
if (updateRecord == null) {
continue;
}
if (!observedTables.contains(tableName)) {
Entry result = schemaUpdater.updateSchemaForTable(tableName, updateRecord.getSchema());
if (result == null) {
throw new InterruptedException("A problem occurred when communicating with Cloud Data Catalog");
}
observedTables.add(tableName);
}
Publisher pubSubPublisher = this.getPubSubPublisher(tableName);
if (pubSubPublisher == null) {
// stop execution without committing any more messages.
throw new InterruptedException("Unable to create a PubSub topic for table " + tableName);
}
usedPublishers.add(pubSubPublisher);
PubsubMessage.Builder messageBuilder = PubsubMessage.newBuilder();
LOG.debug("Update Record is: {}", updateRecord);
try {
RowCoder recordCoder = getCoderForRow(tableName, updateRecord);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
recordCoder.encode(updateRecord, outputStream);
ByteString encodedUpdate = ByteString.copyFrom(outputStream.toByteArray());
PubsubMessage message = messageBuilder.setData(encodedUpdate).putAttributes("table", tableName).build();
futureListBuilder.add(pubSubPublisher.publish(message));
} catch (IOException e) {
LOG.error("Caught exception {} when trying to encode record {}. Stopping processing.", e, updateRecord);
return;
}
} else {
LOG.debug("Discarding record: {}", r);
}
committer.markProcessed(r);
}
usedPublishers.forEach(p -> p.publishAllOutstanding());
for (ApiFuture<String> f : futureListBuilder.build()) {
try {
String result = f.get();
LOG.debug("Result from PubSub Publish Future: {}", result);
} catch (ExecutionException e) {
LOG.error("Exception when executing future {}: {}. Stopping execution.", f, e);
return;
}
}
committer.markBatchFinished();
}
use of com.google.cloud.datacatalog.v1.Entry in project DataflowTemplates by GoogleCloudPlatform.
the class DataCatalogSchemaUtils method lookupPubSubEntry.
static Entry lookupPubSubEntry(DataCatalogClient client, String pubsubTopic, String gcpProject) {
String linkedResource = String.format(DATA_CATALOG_PUBSUB_URI_TEMPLATE, gcpProject, pubsubTopic);
LOG.info("Looking up LinkedResource {}", linkedResource);
LookupEntryRequest request = LookupEntryRequest.newBuilder().setLinkedResource(linkedResource).build();
try {
Entry entry = client.lookupEntry(request);
return entry;
} catch (ApiException e) {
System.out.println("CANT LOOKUP ENTRY" + e.toString());
e.printStackTrace();
LOG.error("ApiException thrown by Data Catalog API:", e);
return null;
}
}
Aggregations