Examples with Predicates - io.debezium.function.Predicates

Example 1 with Predicates

use of io.debezium.function.Predicates in project debezium by debezium.

the class SnapshotReader method execute.

/**
 * Perform the snapshot using the same logic as the "mysqldump" utility.
 */
protected void execute() {
    context.configureLoggingContext("snapshot");
    final AtomicReference<String> sql = new AtomicReference<>();
    final JdbcConnection mysql = connectionContext.jdbc();
    final MySqlSchema schema = context.dbSchema();
    final Filters filters = schema.filters();
    final SourceInfo source = context.source();
    final Clock clock = context.getClock();
    final long ts = clock.currentTimeInMillis();
    logger.info("Starting snapshot for {} with user '{}' with locking mode '{}'", connectionContext.connectionString(), mysql.username(), snapshotLockingMode.getValue());
    logRolesForCurrentUser(mysql);
    logServerInformation(mysql);
    boolean isLocked = false;
    boolean isTxnStarted = false;
    boolean tableLocks = false;
    try {
        metrics.startSnapshot();
        // See: https://dev.mysql.com/doc/refman/5.7/en/innodb-consistent-read.html
        if (!isRunning())
            return;
        logger.info("Step 0: disabling autocommit and enabling repeatable read transactions");
        mysql.setAutoCommit(false);
        sql.set("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ");
        mysql.execute(sql.get());
        // Generate the DDL statements that set the charset-related system variables ...
        Map<String, String> systemVariables = connectionContext.readMySqlCharsetSystemVariables(sql);
        String setSystemVariablesStatement = connectionContext.setStatementFor(systemVariables);
        AtomicBoolean interrupted = new AtomicBoolean(false);
        long lockAcquired = 0L;
        int step = 1;
        try {
            // It also ensures that everything we do while we have this lock will be consistent.
            if (!isRunning())
                return;
            if (!snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.NONE)) {
                try {
                    logger.info("Step 1: flush and obtain global read lock to prevent writes to database");
                    sql.set("FLUSH TABLES WITH READ LOCK");
                    mysql.execute(sql.get());
                    lockAcquired = clock.currentTimeInMillis();
                    metrics.globalLockAcquired();
                    isLocked = true;
                } catch (SQLException e) {
                    logger.info("Step 1: unable to flush and acquire global read lock, will use table read locks after reading table names");
                    // Continue anyway, since RDS (among others) don't allow setting a global lock
                    assert !isLocked;
                }
            }
            // See http://dev.mysql.com/doc/refman/5.7/en/commit.html
            if (!isRunning())
                return;
            logger.info("Step 2: start transaction with consistent snapshot");
            sql.set("START TRANSACTION WITH CONSISTENT SNAPSHOT");
            mysql.execute(sql.get());
            isTxnStarted = true;
            // ------------------------------------
            if (!isRunning())
                return;
            step = 3;
            if (isLocked) {
                // Obtain the binlog position and update the SourceInfo in the context. This means that all source records
                // generated as part of the snapshot will contain the binlog position of the snapshot.
                readBinlogPosition(step++, source, mysql, sql);
            }
            // Get the list of databases ...
            if (!isRunning())
                return;
            logger.info("Step {}: read list of available databases", step++);
            final List<String> databaseNames = new ArrayList<>();
            sql.set("SHOW DATABASES");
            mysql.query(sql.get(), rs -> {
                while (rs.next()) {
                    databaseNames.add(rs.getString(1));
                }
            });
            logger.info("\t list of available databases is: {}", databaseNames);
            // we are reading the database names from the database and not taking them from the user ...
            if (!isRunning())
                return;
            logger.info("Step {}: read list of available tables in each database", step++);
            List<TableId> tableIds = new ArrayList<>();
            final Map<String, List<TableId>> tableIdsByDbName = new HashMap<>();
            final Set<String> readableDatabaseNames = new HashSet<>();
            for (String dbName : databaseNames) {
                try {
                    // MySQL sometimes considers some local files as databases (see DBZ-164),
                    // so we will simply try each one and ignore the problematic ones ...
                    sql.set("SHOW FULL TABLES IN " + quote(dbName) + " where Table_Type = 'BASE TABLE'");
                    mysql.query(sql.get(), rs -> {
                        while (rs.next() && isRunning()) {
                            TableId id = new TableId(dbName, null, rs.getString(1));
                            if (filters.tableFilter().test(id)) {
                                tableIds.add(id);
                                tableIdsByDbName.computeIfAbsent(dbName, k -> new ArrayList<>()).add(id);
                                logger.info("\t including '{}'", id);
                            } else {
                                logger.info("\t '{}' is filtered out, discarding", id);
                            }
                        }
                    });
                    readableDatabaseNames.add(dbName);
                } catch (SQLException e) {
                    // We were unable to execute the query or process the results, so skip this ...
                    logger.warn("\t skipping database '{}' due to error reading tables: {}", dbName, e.getMessage());
                }
            }
            final Set<String> includedDatabaseNames = readableDatabaseNames.stream().filter(filters.databaseFilter()).collect(Collectors.toSet());
            logger.info("\tsnapshot continuing with database(s): {}", includedDatabaseNames);
            if (!isLocked) {
                if (!snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.NONE)) {
                    // implicitly committing our transaction ...
                    if (!connectionContext.userHasPrivileges("LOCK TABLES")) {
                        // We don't have the right privileges
                        throw new ConnectException("User does not have the 'LOCK TABLES' privilege required to obtain a " + "consistent snapshot by preventing concurrent writes to tables.");
                    }
                    // We have the required privileges, so try to lock all of the tables we're interested in ...
                    logger.info("Step {}: flush and obtain read lock for {} tables (preventing writes)", step++, tableIds.size());
                    String tableList = tableIds.stream().map(tid -> quote(tid)).reduce((r, element) -> r + "," + element).orElse(null);
                    if (tableList != null) {
                        sql.set("FLUSH TABLES " + tableList + " WITH READ LOCK");
                        mysql.execute(sql.get());
                    }
                    lockAcquired = clock.currentTimeInMillis();
                    metrics.globalLockAcquired();
                    isLocked = true;
                    tableLocks = true;
                }
                // Our tables are locked, so read the binlog position ...
                readBinlogPosition(step++, source, mysql, sql);
            }
            try {
                logger.info("Step {}: generating DROP and CREATE statements to reflect current database schemas:", step++);
                schema.applyDdl(source, null, setSystemVariablesStatement, this::enqueueSchemaChanges);
                // Add DROP TABLE statements for all tables that we knew about AND those tables found in the databases ...
                Set<TableId> allTableIds = new HashSet<>(schema.tables().tableIds());
                allTableIds.addAll(tableIds);
                allTableIds.stream().filter(// ignore all subsequent tables if this reader is stopped
                id -> isRunning()).forEach(tableId -> schema.applyDdl(source, tableId.schema(), "DROP TABLE IF EXISTS " + quote(tableId), this::enqueueSchemaChanges));
                // Add a DROP DATABASE statement for each database that we no longer know about ...
                schema.tables().tableIds().stream().map(TableId::catalog).filter(Predicates.not(readableDatabaseNames::contains)).filter(// ignore all subsequent tables if this reader is stopped
                id -> isRunning()).forEach(missingDbName -> schema.applyDdl(source, missingDbName, "DROP DATABASE IF EXISTS " + quote(missingDbName), this::enqueueSchemaChanges));
                // Now process all of our tables for each database ...
                for (Map.Entry<String, List<TableId>> entry : tableIdsByDbName.entrySet()) {
                    if (!isRunning())
                        break;
                    String dbName = entry.getKey();
                    // First drop, create, and then use the named database ...
                    schema.applyDdl(source, dbName, "DROP DATABASE IF EXISTS " + quote(dbName), this::enqueueSchemaChanges);
                    schema.applyDdl(source, dbName, "CREATE DATABASE " + quote(dbName), this::enqueueSchemaChanges);
                    schema.applyDdl(source, dbName, "USE " + quote(dbName), this::enqueueSchemaChanges);
                    for (TableId tableId : entry.getValue()) {
                        if (!isRunning())
                            break;
                        sql.set("SHOW CREATE TABLE " + quote(tableId));
                        mysql.query(sql.get(), rs -> {
                            if (rs.next()) {
                                schema.applyDdl(source, dbName, rs.getString(2), this::enqueueSchemaChanges);
                            }
                        });
                    }
                }
                context.makeRecord().regenerate();
            }// most likely, something went wrong while writing the history topic
             catch (Exception e) {
                interrupted.set(true);
                throw e;
            }
            // ------
            if (snapshotLockingMode.equals(MySqlConnectorConfig.SnapshotLockingMode.MINIMAL) && isLocked) {
                if (tableLocks) {
                    // We could not acquire a global read lock and instead had to obtain individual table-level read locks
                    // using 'FLUSH TABLE <tableName> WITH READ LOCK'. However, if we were to do this, the 'UNLOCK TABLES'
                    // would implicitly commit our active transaction, and this would break our consistent snapshot logic.
                    // Therefore, we cannot unlock the tables here!
                    // https://dev.mysql.com/doc/refman/5.7/en/flush.html
                    logger.info("Step {}: tables were locked explicitly, but to get a consistent snapshot we cannot " + "release the locks until we've read all tables.", step++);
                } else {
                    // We are doing minimal blocking via a global read lock, so we should release the global read lock now.
                    // All subsequent SELECT should still use the MVCC snapshot obtained when we started our transaction
                    // (since we started it "...with consistent snapshot"). So, since we're only doing very simple SELECT
                    // without WHERE predicates, we can release the lock now ...
                    logger.info("Step {}: releasing global read lock to enable MySQL writes", step);
                    sql.set("UNLOCK TABLES");
                    mysql.execute(sql.get());
                    isLocked = false;
                    long lockReleased = clock.currentTimeInMillis();
                    metrics.globalLockReleased();
                    logger.info("Step {}: blocked writes to MySQL for a total of {}", step++, Strings.duration(lockReleased - lockAcquired));
                }
            }
            // and produce events we can update the very last event with the non-snapshot offset ...
            if (!isRunning())
                return;
            if (includeData) {
                BufferedBlockingConsumer<SourceRecord> bufferedRecordQueue = BufferedBlockingConsumer.bufferLast(super::enqueueRecord);
                // Dump all of the tables and generate source records ...
                logger.info("Step {}: scanning contents of {} tables while still in transaction", step, tableIds.size());
                metrics.setTableCount(tableIds.size());
                long startScan = clock.currentTimeInMillis();
                AtomicLong totalRowCount = new AtomicLong();
                int counter = 0;
                int completedCounter = 0;
                long largeTableCount = context.rowCountForLargeTable();
                Iterator<TableId> tableIdIter = tableIds.iterator();
                while (tableIdIter.hasNext()) {
                    TableId tableId = tableIdIter.next();
                    if (!isRunning())
                        break;
                    // Obtain a record maker for this table, which knows about the schema ...
                    RecordsForTable recordMaker = context.makeRecord().forTable(tableId, null, bufferedRecordQueue);
                    if (recordMaker != null) {
                        // Switch to the table's database ...
                        sql.set("USE " + quote(tableId.catalog()) + ";");
                        mysql.execute(sql.get());
                        AtomicLong numRows = new AtomicLong(-1);
                        AtomicReference<String> rowCountStr = new AtomicReference<>("<unknown>");
                        StatementFactory statementFactory = this::createStatementWithLargeResultSet;
                        if (largeTableCount > 0) {
                            try {
                                // Choose how we create statements based on the # of rows.
                                // This is approximate and less accurate then COUNT(*),
                                // but far more efficient for large InnoDB tables.
                                sql.set("SHOW TABLE STATUS LIKE '" + tableId.table() + "';");
                                mysql.query(sql.get(), rs -> {
                                    if (rs.next())
                                        numRows.set(rs.getLong(5));
                                });
                                if (numRows.get() <= largeTableCount) {
                                    statementFactory = this::createStatement;
                                }
                                rowCountStr.set(numRows.toString());
                            } catch (SQLException e) {
                                // Log it, but otherwise just use large result set by default ...
                                logger.debug("Error while getting number of rows in table {}: {}", tableId, e.getMessage(), e);
                            }
                        }
                        // Scan the rows in the table ...
                        long start = clock.currentTimeInMillis();
                        logger.info("Step {}: - scanning table '{}' ({} of {} tables)", step, tableId, ++counter, tableIds.size());
                        Map<TableId, String> selectOverrides = getSnapshotSelectOverridesByTable();
                        String selectStatement = selectOverrides.getOrDefault(tableId, "SELECT * FROM " + quote(tableId));
                        logger.info("For table '{}' using select statement: '{}'", tableId, selectStatement);
                        sql.set(selectStatement);
                        try {
                            int stepNum = step;
                            mysql.query(sql.get(), statementFactory, rs -> {
                                long rowNum = 0;
                                try {
                                    // The table is included in the connector's filters, so process all of the table records
                                    // ...
                                    final Table table = schema.tableFor(tableId);
                                    final int numColumns = table.columns().size();
                                    final Object[] row = new Object[numColumns];
                                    while (rs.next()) {
                                        for (int i = 0, j = 1; i != numColumns; ++i, ++j) {
                                            Column actualColumn = table.columns().get(i);
                                            row[i] = readField(rs, j, actualColumn);
                                        }
                                        // has no row number!
                                        recorder.recordRow(recordMaker, row, ts);
                                        ++rowNum;
                                        if (rowNum % 100 == 0 && !isRunning()) {
                                            // We've stopped running ...
                                            break;
                                        }
                                        if (rowNum % 10_000 == 0) {
                                            long stop = clock.currentTimeInMillis();
                                            logger.info("Step {}: - {} of {} rows scanned from table '{}' after {}", stepNum, rowNum, rowCountStr, tableId, Strings.duration(stop - start));
                                        }
                                    }
                                    totalRowCount.addAndGet(rowNum);
                                    if (isRunning()) {
                                        long stop = clock.currentTimeInMillis();
                                        logger.info("Step {}: - Completed scanning a total of {} rows from table '{}' after {}", stepNum, rowNum, tableId, Strings.duration(stop - start));
                                    }
                                } catch (InterruptedException e) {
                                    Thread.interrupted();
                                    // We were not able to finish all rows in all tables ...
                                    logger.info("Step {}: Stopping the snapshot due to thread interruption", stepNum);
                                    interrupted.set(true);
                                }
                            });
                        } finally {
                            metrics.completeTable();
                            if (interrupted.get())
                                break;
                        }
                    }
                    ++completedCounter;
                }
                // See if we've been stopped or interrupted ...
                if (!isRunning() || interrupted.get())
                    return;
                // We've copied all of the tables and we've not yet been stopped, but our buffer holds onto the
                // very last record. First mark the snapshot as complete and then apply the updated offset to
                // the buffered record ...
                source.markLastSnapshot();
                long stop = clock.currentTimeInMillis();
                try {
                    bufferedRecordQueue.close(this::replaceOffset);
                    logger.info("Step {}: scanned {} rows in {} tables in {}", step, totalRowCount, tableIds.size(), Strings.duration(stop - startScan));
                } catch (InterruptedException e) {
                    Thread.interrupted();
                    // We were not able to finish all rows in all tables ...
                    logger.info("Step {}: aborting the snapshot after {} rows in {} of {} tables {}", step, totalRowCount, completedCounter, tableIds.size(), Strings.duration(stop - startScan));
                    interrupted.set(true);
                }
            } else {
                // source.markLastSnapshot(); Think we will not be needing this here it is used to mark last row entry?
                logger.info("Step {}: encountered only schema based snapshot, skipping data snapshot", step);
            }
            step++;
        } finally {
            // No matter what, we always want to do these steps if necessary ...
            boolean rolledBack = false;
            // Either commit or roll back the transaction, BEFORE releasing the locks ...
            if (isTxnStarted) {
                if (interrupted.get() || !isRunning()) {
                    // We were interrupted or were stopped while reading the tables,
                    // so roll back the transaction and return immediately ...
                    logger.info("Step {}: rolling back transaction after abort", step++);
                    sql.set("ROLLBACK");
                    mysql.execute(sql.get());
                    metrics.abortSnapshot();
                    rolledBack = true;
                } else {
                    // Otherwise, commit our transaction
                    logger.info("Step {}: committing transaction", step++);
                    sql.set("COMMIT");
                    mysql.execute(sql.get());
                    metrics.completeSnapshot();
                }
            }
            // Release the read lock(s) if we have not yet done so. Locks are not released when committing/rolling back ...
            if (isLocked && !rolledBack) {
                if (tableLocks) {
                    logger.info("Step {}: releasing table read locks to enable MySQL writes", step++);
                } else {
                    logger.info("Step {}: releasing global read lock to enable MySQL writes", step++);
                }
                sql.set("UNLOCK TABLES");
                mysql.execute(sql.get());
                isLocked = false;
                long lockReleased = clock.currentTimeInMillis();
                metrics.globalLockReleased();
                if (tableLocks) {
                    logger.info("Writes to MySQL prevented for a total of {}", Strings.duration(lockReleased - lockAcquired));
                } else {
                    logger.info("Writes to MySQL tables prevented for a total of {}", Strings.duration(lockReleased - lockAcquired));
                }
            }
        }
        if (!isRunning()) {
            // The reader (and connector) was stopped and we did not finish ...
            try {
                // Mark this reader as having completing its work ...
                completeSuccessfully();
                long stop = clock.currentTimeInMillis();
                logger.info("Stopped snapshot after {} but before completing", Strings.duration(stop - ts));
            } finally {
                // and since there's no more work to do clean up all resources ...
                cleanupResources();
            }
        } else {
            // We completed the snapshot...
            try {
                // Mark the source as having completed the snapshot. This will ensure the `source` field on records
                // are not denoted as a snapshot ...
                source.completeSnapshot();
            } finally {
                // Set the completion flag ...
                completeSuccessfully();
                long stop = clock.currentTimeInMillis();
                logger.info("Completed snapshot in {}", Strings.duration(stop - ts));
            }
        }
    } catch (Throwable e) {
        failed(e, "Aborting snapshot due to error when last running '" + sql.get() + "': " + e.getMessage());
    }
}

Also used : TableId(io.debezium.relational.TableId) RecordsForTable(io.debezium.connector.mysql.RecordMakers.RecordsForTable) Arrays(java.util.Arrays) Connection(java.sql.Connection) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) BufferedBlockingConsumer(io.debezium.function.BufferedBlockingConsumer) Threads(io.debezium.util.Threads) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Matcher(java.util.regex.Matcher) ResultSet(java.sql.ResultSet) Duration(java.time.Duration) Map(java.util.Map) Table(io.debezium.relational.Table) Strings(io.debezium.util.Strings) ExecutorService(java.util.concurrent.ExecutorService) Predicates(io.debezium.function.Predicates) Iterator(java.util.Iterator) Set(java.util.Set) JdbcConnection(io.debezium.jdbc.JdbcConnection) TableId(io.debezium.relational.TableId) Collectors(java.util.stream.Collectors) SourceRecord(org.apache.kafka.connect.source.SourceRecord) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Column(io.debezium.relational.Column) ConnectException(org.apache.kafka.connect.errors.ConnectException) StatementFactory(io.debezium.jdbc.JdbcConnection.StatementFactory) Statement(java.sql.Statement) Clock(io.debezium.util.Clock) Pattern(java.util.regex.Pattern) Blob(java.sql.Blob) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Collections(java.util.Collections) Types(java.sql.Types) SQLException(java.sql.SQLException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JdbcConnection(io.debezium.jdbc.JdbcConnection) Clock(io.debezium.util.Clock) SourceRecord(org.apache.kafka.connect.source.SourceRecord) Column(io.debezium.relational.Column) StatementFactory(io.debezium.jdbc.JdbcConnection.StatementFactory) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) ConnectException(org.apache.kafka.connect.errors.ConnectException) RecordsForTable(io.debezium.connector.mysql.RecordMakers.RecordsForTable) Table(io.debezium.relational.Table) AtomicReference(java.util.concurrent.atomic.AtomicReference) SQLException(java.sql.SQLException) ConnectException(org.apache.kafka.connect.errors.ConnectException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) RecordsForTable(io.debezium.connector.mysql.RecordMakers.RecordsForTable) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RecordsForTable (io.debezium.connector.mysql.RecordMakers.RecordsForTable)1 BufferedBlockingConsumer (io.debezium.function.BufferedBlockingConsumer)1 Predicates (io.debezium.function.Predicates)1 JdbcConnection (io.debezium.jdbc.JdbcConnection)1 StatementFactory (io.debezium.jdbc.JdbcConnection.StatementFactory)1 Column (io.debezium.relational.Column)1 Table (io.debezium.relational.Table)1 TableId (io.debezium.relational.TableId)1 Clock (io.debezium.util.Clock)1 Strings (io.debezium.util.Strings)1 Threads (io.debezium.util.Threads)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Blob (java.sql.Blob)1 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 Types (java.sql.Types)1 Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1