use of io.debezium.function.Predicates in project debezium by debezium.
the class SnapshotReader method execute.
/**
* Perform the snapshot using the same logic as the "mysqldump" utility.
*/
protected void execute() {
context.configureLoggingContext("snapshot");
final AtomicReference<String> sql = new AtomicReference<>();
final JdbcConnection mysql = connectionContext.jdbc();
final MySqlSchema schema = context.dbSchema();
final Filters filters = schema.filters();
final SourceInfo source = context.source();
final Clock clock = context.getClock();
final long ts = clock.currentTimeInMillis();
logger.info("Starting snapshot for {} with user '{}' with locking mode '{}'", connectionContext.connectionString(), mysql.username(), snapshotLockingMode.getValue());
logRolesForCurrentUser(mysql);
logServerInformation(mysql);
boolean isLocked = false;
boolean isTxnStarted = false;
boolean tableLocks = false;
try {
metrics.startSnapshot();
// See: https://dev.mysql.com/doc/refman/5.7/en/innodb-consistent-read.html
if (!isRunning())
return;
logger.info("Step 0: disabling autocommit and enabling repeatable read transactions");
mysql.setAutoCommit(false);
sql.set("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ");
mysql.execute(sql.get());
// Generate the DDL statements that set the charset-related system variables ...
Map<String, String> systemVariables = connectionContext.readMySqlCharsetSystemVariables(sql);
String setSystemVariablesStatement = connectionContext.setStatementFor(systemVariables);
AtomicBoolean interrupted = new AtomicBoolean(false);
long lockAcquired = 0L;
int step = 1;
try {
// It also ensures that everything we do while we have this lock will be consistent.
if (!isRunning())
return;
if (!snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.NONE)) {
try {
logger.info("Step 1: flush and obtain global read lock to prevent writes to database");
sql.set("FLUSH TABLES WITH READ LOCK");
mysql.execute(sql.get());
lockAcquired = clock.currentTimeInMillis();
metrics.globalLockAcquired();
isLocked = true;
} catch (SQLException e) {
logger.info("Step 1: unable to flush and acquire global read lock, will use table read locks after reading table names");
// Continue anyway, since RDS (among others) don't allow setting a global lock
assert !isLocked;
}
}
// See http://dev.mysql.com/doc/refman/5.7/en/commit.html
if (!isRunning())
return;
logger.info("Step 2: start transaction with consistent snapshot");
sql.set("START TRANSACTION WITH CONSISTENT SNAPSHOT");
mysql.execute(sql.get());
isTxnStarted = true;
// ------------------------------------
if (!isRunning())
return;
step = 3;
if (isLocked) {
// Obtain the binlog position and update the SourceInfo in the context. This means that all source records
// generated as part of the snapshot will contain the binlog position of the snapshot.
readBinlogPosition(step++, source, mysql, sql);
}
// Get the list of databases ...
if (!isRunning())
return;
logger.info("Step {}: read list of available databases", step++);
final List<String> databaseNames = new ArrayList<>();
sql.set("SHOW DATABASES");
mysql.query(sql.get(), rs -> {
while (rs.next()) {
databaseNames.add(rs.getString(1));
}
});
logger.info("\t list of available databases is: {}", databaseNames);
// we are reading the database names from the database and not taking them from the user ...
if (!isRunning())
return;
logger.info("Step {}: read list of available tables in each database", step++);
List<TableId> tableIds = new ArrayList<>();
final Map<String, List<TableId>> tableIdsByDbName = new HashMap<>();
final Set<String> readableDatabaseNames = new HashSet<>();
for (String dbName : databaseNames) {
try {
// MySQL sometimes considers some local files as databases (see DBZ-164),
// so we will simply try each one and ignore the problematic ones ...
sql.set("SHOW FULL TABLES IN " + quote(dbName) + " where Table_Type = 'BASE TABLE'");
mysql.query(sql.get(), rs -> {
while (rs.next() && isRunning()) {
TableId id = new TableId(dbName, null, rs.getString(1));
if (filters.tableFilter().test(id)) {
tableIds.add(id);
tableIdsByDbName.computeIfAbsent(dbName, k -> new ArrayList<>()).add(id);
logger.info("\t including '{}'", id);
} else {
logger.info("\t '{}' is filtered out, discarding", id);
}
}
});
readableDatabaseNames.add(dbName);
} catch (SQLException e) {
// We were unable to execute the query or process the results, so skip this ...
logger.warn("\t skipping database '{}' due to error reading tables: {}", dbName, e.getMessage());
}
}
final Set<String> includedDatabaseNames = readableDatabaseNames.stream().filter(filters.databaseFilter()).collect(Collectors.toSet());
logger.info("\tsnapshot continuing with database(s): {}", includedDatabaseNames);
if (!isLocked) {
if (!snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.NONE)) {
// implicitly committing our transaction ...
if (!connectionContext.userHasPrivileges("LOCK TABLES")) {
// We don't have the right privileges
throw new ConnectException("User does not have the 'LOCK TABLES' privilege required to obtain a " + "consistent snapshot by preventing concurrent writes to tables.");
}
// We have the required privileges, so try to lock all of the tables we're interested in ...
logger.info("Step {}: flush and obtain read lock for {} tables (preventing writes)", step++, tableIds.size());
String tableList = tableIds.stream().map(tid -> quote(tid)).reduce((r, element) -> r + "," + element).orElse(null);
if (tableList != null) {
sql.set("FLUSH TABLES " + tableList + " WITH READ LOCK");
mysql.execute(sql.get());
}
lockAcquired = clock.currentTimeInMillis();
metrics.globalLockAcquired();
isLocked = true;
tableLocks = true;
}
// Our tables are locked, so read the binlog position ...
readBinlogPosition(step++, source, mysql, sql);
}
try {
logger.info("Step {}: generating DROP and CREATE statements to reflect current database schemas:", step++);
schema.applyDdl(source, null, setSystemVariablesStatement, this::enqueueSchemaChanges);
// Add DROP TABLE statements for all tables that we knew about AND those tables found in the databases ...
Set<TableId> allTableIds = new HashSet<>(schema.tables().tableIds());
allTableIds.addAll(tableIds);
allTableIds.stream().filter(// ignore all subsequent tables if this reader is stopped
id -> isRunning()).forEach(tableId -> schema.applyDdl(source, tableId.schema(), "DROP TABLE IF EXISTS " + quote(tableId), this::enqueueSchemaChanges));
// Add a DROP DATABASE statement for each database that we no longer know about ...
schema.tables().tableIds().stream().map(TableId::catalog).filter(Predicates.not(readableDatabaseNames::contains)).filter(// ignore all subsequent tables if this reader is stopped
id -> isRunning()).forEach(missingDbName -> schema.applyDdl(source, missingDbName, "DROP DATABASE IF EXISTS " + quote(missingDbName), this::enqueueSchemaChanges));
// Now process all of our tables for each database ...
for (Map.Entry<String, List<TableId>> entry : tableIdsByDbName.entrySet()) {
if (!isRunning())
break;
String dbName = entry.getKey();
// First drop, create, and then use the named database ...
schema.applyDdl(source, dbName, "DROP DATABASE IF EXISTS " + quote(dbName), this::enqueueSchemaChanges);
schema.applyDdl(source, dbName, "CREATE DATABASE " + quote(dbName), this::enqueueSchemaChanges);
schema.applyDdl(source, dbName, "USE " + quote(dbName), this::enqueueSchemaChanges);
for (TableId tableId : entry.getValue()) {
if (!isRunning())
break;
sql.set("SHOW CREATE TABLE " + quote(tableId));
mysql.query(sql.get(), rs -> {
if (rs.next()) {
schema.applyDdl(source, dbName, rs.getString(2), this::enqueueSchemaChanges);
}
});
}
}
context.makeRecord().regenerate();
}// most likely, something went wrong while writing the history topic
catch (Exception e) {
interrupted.set(true);
throw e;
}
// ------
if (snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.MINIMAL) && isLocked) {
if (tableLocks) {
// We could not acquire a global read lock and instead had to obtain individual table-level read locks
// using 'FLUSH TABLE <tableName> WITH READ LOCK'. However, if we were to do this, the 'UNLOCK TABLES'
// would implicitly commit our active transaction, and this would break our consistent snapshot logic.
// Therefore, we cannot unlock the tables here!
// https://dev.mysql.com/doc/refman/5.7/en/flush.html
logger.info("Step {}: tables were locked explicitly, but to get a consistent snapshot we cannot " + "release the locks until we've read all tables.", step++);
} else {
// We are doing minimal blocking via a global read lock, so we should release the global read lock now.
// All subsequent SELECT should still use the MVCC snapshot obtained when we started our transaction
// (since we started it "...with consistent snapshot"). So, since we're only doing very simple SELECT
// without WHERE predicates, we can release the lock now ...
logger.info("Step {}: releasing global read lock to enable MySQL writes", step);
sql.set("UNLOCK TABLES");
mysql.execute(sql.get());
isLocked = false;
long lockReleased = clock.currentTimeInMillis();
metrics.globalLockReleased();
logger.info("Step {}: blocked writes to MySQL for a total of {}", step++, Strings.duration(lockReleased - lockAcquired));
}
}
// and produce events we can update the very last event with the non-snapshot offset ...
if (!isRunning())
return;
if (includeData) {
BufferedBlockingConsumer<SourceRecord> bufferedRecordQueue = BufferedBlockingConsumer.bufferLast(super::enqueueRecord);
// Dump all of the tables and generate source records ...
logger.info("Step {}: scanning contents of {} tables while still in transaction", step, tableIds.size());
metrics.setTableCount(tableIds.size());
long startScan = clock.currentTimeInMillis();
AtomicLong totalRowCount = new AtomicLong();
int counter = 0;
int completedCounter = 0;
long largeTableCount = context.rowCountForLargeTable();
Iterator<TableId> tableIdIter = tableIds.iterator();
while (tableIdIter.hasNext()) {
TableId tableId = tableIdIter.next();
if (!isRunning())
break;
// Obtain a record maker for this table, which knows about the schema ...
RecordsForTable recordMaker = context.makeRecord().forTable(tableId, null, bufferedRecordQueue);
if (recordMaker != null) {
// Switch to the table's database ...
sql.set("USE " + quote(tableId.catalog()) + ";");
mysql.execute(sql.get());
AtomicLong numRows = new AtomicLong(-1);
AtomicReference<String> rowCountStr = new AtomicReference<>("<unknown>");
StatementFactory statementFactory = this::createStatementWithLargeResultSet;
if (largeTableCount > 0) {
try {
// Choose how we create statements based on the # of rows.
// This is approximate and less accurate then COUNT(*),
// but far more efficient for large InnoDB tables.
sql.set("SHOW TABLE STATUS LIKE '" + tableId.table() + "';");
mysql.query(sql.get(), rs -> {
if (rs.next())
numRows.set(rs.getLong(5));
});
if (numRows.get() <= largeTableCount) {
statementFactory = this::createStatement;
}
rowCountStr.set(numRows.toString());
} catch (SQLException e) {
// Log it, but otherwise just use large result set by default ...
logger.debug("Error while getting number of rows in table {}: {}", tableId, e.getMessage(), e);
}
}
// Scan the rows in the table ...
long start = clock.currentTimeInMillis();
logger.info("Step {}: - scanning table '{}' ({} of {} tables)", step, tableId, ++counter, tableIds.size());
Map<TableId, String> selectOverrides = getSnapshotSelectOverridesByTable();
String selectStatement = selectOverrides.getOrDefault(tableId, "SELECT * FROM " + quote(tableId));
logger.info("For table '{}' using select statement: '{}'", tableId, selectStatement);
sql.set(selectStatement);
try {
int stepNum = step;
mysql.query(sql.get(), statementFactory, rs -> {
long rowNum = 0;
try {
// The table is included in the connector's filters, so process all of the table records
// ...
final Table table = schema.tableFor(tableId);
final int numColumns = table.columns().size();
final Object[] row = new Object[numColumns];
while (rs.next()) {
for (int i = 0, j = 1; i != numColumns; ++i, ++j) {
Column actualColumn = table.columns().get(i);
row[i] = readField(rs, j, actualColumn);
}
// has no row number!
recorder.recordRow(recordMaker, row, ts);
++rowNum;
if (rowNum % 100 == 0 && !isRunning()) {
// We've stopped running ...
break;
}
if (rowNum % 10_000 == 0) {
long stop = clock.currentTimeInMillis();
logger.info("Step {}: - {} of {} rows scanned from table '{}' after {}", stepNum, rowNum, rowCountStr, tableId, Strings.duration(stop - start));
}
}
totalRowCount.addAndGet(rowNum);
if (isRunning()) {
long stop = clock.currentTimeInMillis();
logger.info("Step {}: - Completed scanning a total of {} rows from table '{}' after {}", stepNum, rowNum, tableId, Strings.duration(stop - start));
}
} catch (InterruptedException e) {
Thread.interrupted();
// We were not able to finish all rows in all tables ...
logger.info("Step {}: Stopping the snapshot due to thread interruption", stepNum);
interrupted.set(true);
}
});
} finally {
metrics.completeTable();
if (interrupted.get())
break;
}
}
++completedCounter;
}
// See if we've been stopped or interrupted ...
if (!isRunning() || interrupted.get())
return;
// We've copied all of the tables and we've not yet been stopped, but our buffer holds onto the
// very last record. First mark the snapshot as complete and then apply the updated offset to
// the buffered record ...
source.markLastSnapshot();
long stop = clock.currentTimeInMillis();
try {
bufferedRecordQueue.close(this::replaceOffset);
logger.info("Step {}: scanned {} rows in {} tables in {}", step, totalRowCount, tableIds.size(), Strings.duration(stop - startScan));
} catch (InterruptedException e) {
Thread.interrupted();
// We were not able to finish all rows in all tables ...
logger.info("Step {}: aborting the snapshot after {} rows in {} of {} tables {}", step, totalRowCount, completedCounter, tableIds.size(), Strings.duration(stop - startScan));
interrupted.set(true);
}
} else {
// source.markLastSnapshot(); Think we will not be needing this here it is used to mark last row entry?
logger.info("Step {}: encountered only schema based snapshot, skipping data snapshot", step);
}
step++;
} finally {
// No matter what, we always want to do these steps if necessary ...
boolean rolledBack = false;
// Either commit or roll back the transaction, BEFORE releasing the locks ...
if (isTxnStarted) {
if (interrupted.get() || !isRunning()) {
// We were interrupted or were stopped while reading the tables,
// so roll back the transaction and return immediately ...
logger.info("Step {}: rolling back transaction after abort", step++);
sql.set("ROLLBACK");
mysql.execute(sql.get());
metrics.abortSnapshot();
rolledBack = true;
} else {
// Otherwise, commit our transaction
logger.info("Step {}: committing transaction", step++);
sql.set("COMMIT");
mysql.execute(sql.get());
metrics.completeSnapshot();
}
}
// Release the read lock(s) if we have not yet done so. Locks are not released when committing/rolling back ...
if (isLocked && !rolledBack) {
if (tableLocks) {
logger.info("Step {}: releasing table read locks to enable MySQL writes", step++);
} else {
logger.info("Step {}: releasing global read lock to enable MySQL writes", step++);
}
sql.set("UNLOCK TABLES");
mysql.execute(sql.get());
isLocked = false;
long lockReleased = clock.currentTimeInMillis();
metrics.globalLockReleased();
if (tableLocks) {
logger.info("Writes to MySQL prevented for a total of {}", Strings.duration(lockReleased - lockAcquired));
} else {
logger.info("Writes to MySQL tables prevented for a total of {}", Strings.duration(lockReleased - lockAcquired));
}
}
}
if (!isRunning()) {
// The reader (and connector) was stopped and we did not finish ...
try {
// Mark this reader as having completing its work ...
completeSuccessfully();
long stop = clock.currentTimeInMillis();
logger.info("Stopped snapshot after {} but before completing", Strings.duration(stop - ts));
} finally {
// and since there's no more work to do clean up all resources ...
cleanupResources();
}
} else {
// We completed the snapshot...
try {
// Mark the source as having completed the snapshot. This will ensure the `source` field on records
// are not denoted as a snapshot ...
source.completeSnapshot();
} finally {
// Set the completion flag ...
completeSuccessfully();
long stop = clock.currentTimeInMillis();
logger.info("Completed snapshot in {}", Strings.duration(stop - ts));
}
}
} catch (Throwable e) {
failed(e, "Aborting snapshot due to error when last running '" + sql.get() + "': " + e.getMessage());
}
}
Aggregations