Search in sources :

Example 1 with CatalogFilter

use of org.apache.hadoop.hive.metastore.messaging.event.filters.CatalogFilter in project hive by apache.

the class ReplDumpTask method incrementalDump.

private Long incrementalDump(Path dumpRoot, DumpMetaData dmd, Path cmRoot, Hive hiveDb) throws Exception {
    // get list of events matching dbPattern & tblPattern
    Long lastReplId;
    // go through each event, and dump out each event to a event-level dump dir inside dumproot
    String validTxnList = null;
    long waitUntilTime = 0;
    long bootDumpBeginReplId = -1;
    List<String> tableList = work.replScope.includeAllTables() ? null : new ArrayList<>();
    SnapshotUtils.ReplSnapshotCount snapshotCount = null;
    // and wait only for the remaining time if any.
    if (needBootstrapAcidTablesDuringIncrementalDump()) {
        work.setBootstrap(true);
        bootDumpBeginReplId = queryState.getConf().getLong(ReplUtils.LAST_REPL_ID_KEY, -1L);
        assert (bootDumpBeginReplId >= 0);
        LOG.info("Dump for bootstrapping ACID tables during an incremental dump for db {}", work.dbNameOrPattern);
        long timeoutInMs = HiveConf.getTimeVar(conf, HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT, TimeUnit.MILLISECONDS);
        waitUntilTime = System.currentTimeMillis() + timeoutInMs;
    }
    // TODO : instead of simply restricting by message format, we should eventually
    // move to a jdbc-driver-stype registering of message format, and picking message
    // factory per event to decode. For now, however, since all messages have the
    // same factory, restricting by message format is effectively a guard against
    // older leftover data that would cause us problems.
    String dbName = work.dbNameOrPattern;
    Database db = hiveDb.getDatabase(dbName);
    if (!HiveConf.getBoolVar(conf, REPL_DUMP_METADATA_ONLY)) {
        setReplSourceFor(hiveDb, dbName, db);
    }
    if (shouldFailover()) {
        if (!MetaStoreUtils.isDbBeingFailedOver(db)) {
            setReplFailoverEnabledAtSource(db);
        }
        fetchFailoverMetadata(hiveDb);
        assert work.getFailoverMetadata().isValidMetadata();
        work.overrideLastEventToDump(hiveDb, bootDumpBeginReplId, work.getFailoverMetadata().getFailoverEventId());
    } else {
        work.overrideLastEventToDump(hiveDb, bootDumpBeginReplId, -1);
    }
    IMetaStoreClient.NotificationFilter evFilter = new AndFilter(new ReplEventFilter(work.replScope), new CatalogFilter(MetaStoreUtils.getDefaultCatalog(conf)), new EventBoundaryFilter(work.eventFrom, work.eventTo));
    EventUtils.MSClientNotificationFetcher evFetcher = new EventUtils.MSClientNotificationFetcher(hiveDb);
    int maxEventLimit = getMaxEventAllowed(work.maxEventLimit());
    EventUtils.NotificationEventIterator evIter = new EventUtils.NotificationEventIterator(evFetcher, work.eventFrom, maxEventLimit, evFilter);
    lastReplId = work.eventTo;
    Path ackFile = new Path(dumpRoot, ReplAck.EVENTS_DUMP.toString());
    long resumeFrom = Utils.fileExists(ackFile, conf) ? getResumeFrom(ackFile) : work.eventFrom;
    long estimatedNumEvents = evFetcher.getDbNotificationEventsCount(work.eventFrom, dbName, work.eventTo, maxEventLimit);
    try {
        IncrementalDumpLogger replLogger = new IncrementalDumpLogger(dbName, dumpRoot.toString(), estimatedNumEvents, work.eventFrom, work.eventTo, maxEventLimit);
        work.setReplLogger(replLogger);
        replLogger.startLog();
        Map<String, Long> metricMap = new HashMap<>();
        metricMap.put(ReplUtils.MetricName.EVENTS.name(), estimatedNumEvents);
        if (conf.getBoolVar(HiveConf.ConfVars.HIVE_REPL_FAILOVER_START)) {
            work.getMetricCollector().reportFailoverStart(getName(), metricMap, work.getFailoverMetadata());
        } else {
            work.getMetricCollector().reportStageStart(getName(), metricMap);
        }
        long dumpedCount = resumeFrom - work.eventFrom;
        if (dumpedCount > 0) {
            LOG.info("Event id {} to {} are already dumped, skipping {} events", work.eventFrom, resumeFrom, dumpedCount);
        }
        cleanFailedEventDirIfExists(dumpRoot, resumeFrom);
        while (evIter.hasNext()) {
            NotificationEvent ev = evIter.next();
            lastReplId = ev.getEventId();
            if (ev.getEventId() <= resumeFrom) {
                continue;
            }
            // disable materialized-view replication if not configured
            if (!isMaterializedViewsReplEnabled()) {
                String tblName = ev.getTableName();
                if (tblName != null) {
                    try {
                        Table table = hiveDb.getTable(dbName, tblName);
                        if (table != null && TableType.MATERIALIZED_VIEW.equals(table.getTableType())) {
                            LOG.info("Attempt to dump materialized view : " + tblName);
                            continue;
                        }
                    } catch (InvalidTableException te) {
                        LOG.debug(te.getMessage());
                    }
                }
            }
            Path evRoot = new Path(dumpRoot, String.valueOf(lastReplId));
            dumpEvent(ev, evRoot, dumpRoot, cmRoot, hiveDb);
            Utils.writeOutput(String.valueOf(lastReplId), ackFile, conf);
        }
        replLogger.endLog(lastReplId.toString());
        LOG.info("Done dumping events, preparing to return {},{}", dumpRoot.toUri(), lastReplId);
    } finally {
        // write the dmd always irrespective of success/failure to enable checkpointing in table level replication
        long executionId = conf.getLong(Constants.SCHEDULED_QUERY_EXECUTIONID, 0L);
        dmd.setDump(DumpType.INCREMENTAL, work.eventFrom, lastReplId, cmRoot, executionId, previousReplScopeModified());
        // If repl policy is changed (oldReplScope is set), then pass the current replication policy,
        // so that REPL LOAD would drop the tables which are not included in current policy.
        dmd.setReplScope(work.replScope);
        dmd.write(true);
    }
    // Get snapshot related configurations for external data copy.
    boolean isSnapshotEnabled = conf.getBoolVar(REPL_SNAPSHOT_DIFF_FOR_EXTERNAL_TABLE_COPY);
    String snapshotPrefix = dbName.toLowerCase();
    ArrayList<String> prevSnaps = new ArrayList<>();
    try (FileList managedTblList = createTableFileList(dumpRoot, EximUtil.FILE_LIST, conf);
        FileList extTableFileList = createTableFileList(dumpRoot, EximUtil.FILE_LIST_EXTERNAL, conf);
        FileList snapPathFileList = isSnapshotEnabled ? createTableFileList(SnapshotUtils.getSnapshotFileListPath(dumpRoot), EximUtil.FILE_LIST_EXTERNAL_SNAPSHOT_CURRENT, conf) : null) {
        // Examine all the tables if required.
        if (shouldExamineTablesToDump() || (tableList != null)) {
            // If required wait more for any transactions open at the time of starting the ACID bootstrap.
            if (needBootstrapAcidTablesDuringIncrementalDump()) {
                assert (waitUntilTime > 0);
                validTxnList = getValidTxnListForReplDump(hiveDb, waitUntilTime);
            }
            /* When same dump dir is resumed because of check-pointing, we need to clear the existing metadata.
      We need to rewrite the metadata as the write id list will be changed.
      We can't reuse the previous write id as it might be invalid due to compaction. */
            Path bootstrapRoot = new Path(dumpRoot, ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME);
            Path metadataPath = new Path(bootstrapRoot, EximUtil.METADATA_PATH_NAME);
            FileSystem fs = FileSystem.get(metadataPath.toUri(), conf);
            try {
                fs.delete(metadataPath, true);
            } catch (FileNotFoundException e) {
            // no worries
            }
            Path dbRootMetadata = new Path(metadataPath, dbName);
            Path dbRootData = new Path(bootstrapRoot, EximUtil.DATA_PATH_NAME + File.separator + dbName);
            boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET);
            ReplExternalTables externalTablesWriter = new ReplExternalTables(conf);
            boolean isSingleTaskForExternalDb = conf.getBoolVar(REPL_EXTERNAL_WAREHOUSE_SINGLE_COPY_TASK) && work.replScope.includeAllTables();
            HashMap<String, Boolean> singleCopyPaths = getNonTableLevelCopyPaths(db, isSingleTaskForExternalDb);
            boolean isExternalTablePresent = false;
            if (isSnapshotEnabled) {
                snapshotCount = new SnapshotUtils.ReplSnapshotCount();
                if (snapPathFileList.hasNext()) {
                    prevSnaps = getListFromFileList(snapPathFileList);
                }
            }
            for (String matchedDbName : Utils.matchesDb(hiveDb, work.dbNameOrPattern)) {
                for (String tableName : Utils.matchesTbl(hiveDb, matchedDbName, work.replScope)) {
                    try {
                        Table table = hiveDb.getTable(matchedDbName, tableName);
                        // Dump external table locations if required.
                        if (TableType.EXTERNAL_TABLE.equals(table.getTableType()) && shouldDumpExternalTableLocation(conf)) {
                            externalTablesWriter.dataLocationDump(table, extTableFileList, singleCopyPaths, !isSingleTaskForExternalDb, conf);
                            isExternalTablePresent = true;
                        }
                        // Dump the table to be bootstrapped if required.
                        if (shouldBootstrapDumpTable(table)) {
                            HiveWrapper.Tuple<Table> tableTuple = new HiveWrapper(hiveDb, matchedDbName).table(table);
                            dumpTable(matchedDbName, tableName, validTxnList, dbRootMetadata, dbRootData, bootDumpBeginReplId, hiveDb, tableTuple, managedTblList, dataCopyAtLoad);
                        }
                        if (tableList != null && isTableSatifiesConfig(table)) {
                            tableList.add(tableName);
                        }
                    } catch (InvalidTableException te) {
                        // Repl dump shouldn't fail if the table is dropped/renamed while dumping it.
                        // Just log a debug message and skip it.
                        LOG.debug(te.getMessage());
                    }
                }
                // the database default location and the paths configured.
                if (isExternalTablePresent && shouldDumpExternalTableLocation(conf) && isSingleTaskForExternalDb) {
                    externalTablesWriter.dumpNonTableLevelCopyPaths(singleCopyPaths, extTableFileList, conf, isSnapshotEnabled, snapshotPrefix, snapshotCount, snapPathFileList, prevSnaps, false);
                }
            }
            dumpTableListToDumpLocation(tableList, dumpRoot, dbName, conf);
        }
        setDataCopyIterators(extTableFileList, managedTblList);
        work.getMetricCollector().reportStageEnd(getName(), Status.SUCCESS, lastReplId, snapshotCount, null);
        // Clean-up snapshots
        if (isSnapshotEnabled) {
            cleanupSnapshots(SnapshotUtils.getSnapshotFileListPath(dumpRoot), work.dbNameOrPattern.toLowerCase(), conf, snapshotCount, false);
        }
        return lastReplId;
    }
}
Also used : EventBoundaryFilter(org.apache.hadoop.hive.metastore.messaging.event.filters.EventBoundaryFilter) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) HiveWrapper(org.apache.hadoop.hive.ql.parse.repl.dump.HiveWrapper) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) ReplChangeManager.getReplPolicyIdString(org.apache.hadoop.hive.metastore.ReplChangeManager.getReplPolicyIdString) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) SnapshotUtils(org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) FileSystem(org.apache.hadoop.fs.FileSystem) OptimisedBootstrapUtils.getReplEventIdFromDatabase(org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getReplEventIdFromDatabase) Database(org.apache.hadoop.hive.metastore.api.Database) Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) FileList(org.apache.hadoop.hive.ql.exec.repl.util.FileList) SnapshotUtils.getListFromFileList(org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils.getListFromFileList) EventUtils(org.apache.hadoop.hive.ql.metadata.events.EventUtils) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent) ReplEventFilter(org.apache.hadoop.hive.metastore.messaging.event.filters.ReplEventFilter) AndFilter(org.apache.hadoop.hive.metastore.messaging.event.filters.AndFilter) CatalogFilter(org.apache.hadoop.hive.metastore.messaging.event.filters.CatalogFilter) IncrementalDumpLogger(org.apache.hadoop.hive.ql.parse.repl.dump.log.IncrementalDumpLogger)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)1 ReplChangeManager.getReplPolicyIdString (org.apache.hadoop.hive.metastore.ReplChangeManager.getReplPolicyIdString)1 Database (org.apache.hadoop.hive.metastore.api.Database)1 NotificationEvent (org.apache.hadoop.hive.metastore.api.NotificationEvent)1 AndFilter (org.apache.hadoop.hive.metastore.messaging.event.filters.AndFilter)1 CatalogFilter (org.apache.hadoop.hive.metastore.messaging.event.filters.CatalogFilter)1 EventBoundaryFilter (org.apache.hadoop.hive.metastore.messaging.event.filters.EventBoundaryFilter)1 ReplEventFilter (org.apache.hadoop.hive.metastore.messaging.event.filters.ReplEventFilter)1 OptimisedBootstrapUtils.getReplEventIdFromDatabase (org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getReplEventIdFromDatabase)1 FileList (org.apache.hadoop.hive.ql.exec.repl.util.FileList)1 SnapshotUtils (org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils)1 SnapshotUtils.getListFromFileList (org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils.getListFromFileList)1 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1