Search in sources :

Example 11 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class ReplDumpWork method managedTableCopyTasks.

public List<Task<?>> managedTableCopyTasks(TaskTracker tracker, HiveConf conf) throws IOException {
    if (conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_SKIP_IMMUTABLE_DATA_COPY)) {
        return Collections.emptyList();
    }
    List<Task<?>> tasks = new ArrayList<>();
    Retryable retryable = Retryable.builder().withHiveConf(conf).withRetryOnException(UncheckedIOException.class).build();
    try {
        retryable.executeCallable((Callable<Void>) () -> {
            try {
                int numEntriesToSkip = tasks == null ? 0 : tasks.size();
                while (managedTblCopyPathIterator.hasNext() && tracker.canAddMoreTasks()) {
                    if (numEntriesToSkip > 0) {
                        // skip tasks added in previous attempts of this retryable block
                        managedTblCopyPathIterator.next();
                        numEntriesToSkip--;
                        continue;
                    }
                    ReplicationSpec replSpec = new ReplicationSpec();
                    replSpec.setIsReplace(true);
                    replSpec.setInReplicationScope(true);
                    EximUtil.DataCopyPath managedTableCopyPath = new EximUtil.DataCopyPath(replSpec);
                    managedTableCopyPath.loadFromString(managedTblCopyPathIterator.next());
                    // If its incremental, in checkpointing case, dump dir may exist. We will delete the event dir.
                    // In case of bootstrap checkpointing we will not delete the entire dir and just do a sync
                    Task<?> copyTask = ReplCopyTask.getDumpCopyTask(managedTableCopyPath.getReplicationSpec(), managedTableCopyPath.getSrcPath(), managedTableCopyPath.getTargetPath(), conf, false, shouldOverwrite, !isBootstrap(), getCurrentDumpPath().toString(), getMetricCollector());
                    tasks.add(copyTask);
                    tracker.addTask(copyTask);
                    LOG.debug("added task for {}", managedTableCopyPath);
                }
            } catch (UncheckedIOException e) {
                LOG.error("Reading entry for data copy failed for managed tables, attempting retry.", e);
                throw e;
            }
            return null;
        });
    } catch (Exception e) {
        throw new IOException(ErrorMsg.REPL_RETRY_EXHAUSTED.format(e.getMessage()));
    }
    return tasks;
}
Also used : ReplCopyTask(org.apache.hadoop.hive.ql.exec.ReplCopyTask) Task(org.apache.hadoop.hive.ql.exec.Task) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Retryable(org.apache.hadoop.hive.ql.exec.util.Retryable) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) EximUtil(org.apache.hadoop.hive.ql.parse.EximUtil) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 12 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class ReplLoadTask method executeIncrementalLoad.

private int executeIncrementalLoad(long loadStartTime) throws Exception {
    // that are excluded in the new replication policy.
    if (work.replScopeModified) {
        dropTablesExcludedInReplScope(work.currentReplScope);
    }
    Database targetDb = getHive().getDatabase(work.dbNameToLoadIn);
    Map<String, String> props = new HashMap<>();
    // Check if it is a optimised bootstrap failover.
    if (work.isFirstFailover) {
        // Check it should be marked as target of replication & not source of replication.
        if (MetaStoreUtils.isTargetOfReplication(targetDb)) {
            LOG.error("The database {} is already marked as target for replication", targetDb.getName());
            throw new Exception("Failover target is already marked as target");
        }
        if (!ReplChangeManager.isSourceOfReplication(targetDb)) {
            LOG.error("The database {} is already source of replication.", targetDb.getName());
            throw new Exception("Failover target was not source of replication");
        }
        boolean isTableDiffPresent = checkFileExists(new Path(work.dumpDirectory).getParent(), conf, TABLE_DIFF_COMPLETE_DIRECTORY);
        Long eventId = Long.parseLong(getEventIdFromFile(new Path(work.dumpDirectory).getParent(), conf)[0]);
        if (!isTableDiffPresent) {
            prepareTableDiffFile(eventId, getHive(), work, conf);
            if (this.childTasks == null) {
                this.childTasks = new ArrayList<>();
            }
            createReplLoadCompleteAckTask();
            return 0;
        }
    } else if (work.isSecondFailover) {
        // DROP the tables to be bootstrapped.
        Hive db = getHive();
        for (String table : work.tablesToBootstrap) {
            db.dropTable(work.dbNameToLoadIn + "." + table, true);
        }
    }
    if (!MetaStoreUtils.isTargetOfReplication(targetDb)) {
        props.put(ReplConst.TARGET_OF_REPLICATION, ReplConst.TRUE);
    }
    if (!work.shouldFailover() && MetaStoreUtils.isDbBeingFailedOver(targetDb)) {
        props.put(ReplConst.REPL_FAILOVER_ENDPOINT, "");
    }
    if (!props.isEmpty()) {
        AlterDatabaseSetPropertiesDesc setTargetDesc = new AlterDatabaseSetPropertiesDesc(work.dbNameToLoadIn, props, null);
        Task<?> addReplTargetPropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), setTargetDesc, true, work.dumpDirectory, work.getMetricCollector()), conf);
        if (this.childTasks == null) {
            this.childTasks = new ArrayList<>();
        }
        this.childTasks.add(addReplTargetPropTask);
    }
    IncrementalLoadTasksBuilder builder = work.incrementalLoadTasksBuilder();
    // If incremental events are already applied, then check and perform if need to bootstrap any tables.
    if (!builder.hasMoreWork() && work.isLastReplIDUpdated()) {
        if (work.hasBootstrapLoadTasks()) {
            LOG.debug("Current incremental dump have tables to be bootstrapped. Switching to bootstrap " + "mode after applying all events.");
            return executeBootStrapLoad();
        }
    }
    List<Task<?>> childTasks = new ArrayList<>();
    int maxTasks = conf.getIntVar(HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS);
    TaskTracker tracker = new TaskTracker(maxTasks);
    addLazyDataCopyTask(tracker, builder.getReplLogger());
    childTasks.add(builder.build(context, getHive(), LOG, tracker));
    // incremental cycle won't consider the events in this dump again if it starts from this id.
    if (!builder.hasMoreWork()) {
        // The name of the database to be loaded into is either specified directly in REPL LOAD
        // command i.e. when dbNameToLoadIn has a valid dbname or is available through dump
        // metadata during table level replication.
        String dbName = work.dbNameToLoadIn;
        if (dbName == null || StringUtils.isBlank(dbName)) {
            if (work.currentReplScope != null) {
                String replScopeDbName = work.currentReplScope.getDbName();
                if (replScopeDbName != null && !"*".equals(replScopeDbName)) {
                    dbName = replScopeDbName;
                }
            }
        }
        // update repl id in all those databases.
        if (StringUtils.isNotBlank(dbName)) {
            String lastEventid = builder.eventTo().toString();
            Map<String, String> mapProp = new HashMap<>();
            mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID_SOURCE.toString(), lastEventid);
            AlterDatabaseSetPropertiesDesc alterDbDesc = new AlterDatabaseSetPropertiesDesc(dbName, mapProp, new ReplicationSpec(lastEventid, lastEventid));
            Task<?> updateReplIdTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), alterDbDesc, true, (new Path(work.dumpDirectory).getParent()).toString(), work.getMetricCollector()), conf);
            DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(updateReplIdTask));
            work.setLastReplIDUpdated(true);
            LOG.debug("Added task to set last repl id of db " + dbName + " to " + lastEventid);
        }
    }
    // Once all the incremental events are applied, enable bootstrap of tables if exist.
    if (builder.hasMoreWork() || work.hasBootstrapLoadTasks()) {
        DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(TaskFactory.get(work, conf)));
    }
    if (this.childTasks == null) {
        this.childTasks = new ArrayList<>();
    }
    this.childTasks.addAll(childTasks);
    createReplLoadCompleteAckTask();
    // Clean-up snapshots
    if (conf.getBoolVar(REPL_SNAPSHOT_DIFF_FOR_EXTERNAL_TABLE_COPY)) {
        cleanupSnapshots(new Path(work.getDumpDirectory()).getParent().getParent().getParent(), work.getSourceDbName().toLowerCase(), conf, null, true);
    }
    // pass the current time at the end of repl-load stage as the starting time of the first event.
    long currentTimestamp = System.currentTimeMillis();
    ((IncrementalLoadLogger) work.incrementalLoadTasksBuilder().getReplLogger()).initiateEventTimestamp(currentTimestamp);
    LOG.info("REPL_INCREMENTAL_LOAD stage duration : {} ms", currentTimestamp - loadStartTime);
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) TaskTracker(org.apache.hadoop.hive.ql.exec.repl.util.TaskTracker) Task(org.apache.hadoop.hive.ql.exec.Task) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TException(org.apache.thrift.TException) IOException(java.io.IOException) LoadConstraint(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadConstraint) IncrementalLoadLogger(org.apache.hadoop.hive.ql.parse.repl.load.log.IncrementalLoadLogger) Hive(org.apache.hadoop.hive.ql.metadata.Hive) IncrementalLoadTasksBuilder(org.apache.hadoop.hive.ql.exec.repl.incremental.IncrementalLoadTasksBuilder) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) LoadDatabase(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase) Database(org.apache.hadoop.hive.metastore.api.Database) AlterDatabase(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase.AlterDatabase) AlterDatabaseSetPropertiesDesc(org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc) HashSet(java.util.HashSet) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Example 13 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class ReplDumpTask method getNewEventOnlyReplicationSpec.

private ReplicationSpec getNewEventOnlyReplicationSpec(Long eventId) {
    ReplicationSpec rspec = getNewReplicationSpec(eventId.toString(), eventId.toString(), conf.getBoolean(REPL_DUMP_METADATA_ONLY.varname, false));
    rspec.setReplSpecType(ReplicationSpec.Type.INCREMENTAL_DUMP);
    return rspec;
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec)

Example 14 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class IncrementalLoadTasksBuilder method dbUpdateReplStateTask.

private Task<?> dbUpdateReplStateTask(String dbName, String replState, Task<?> preCursor) {
    HashMap<String, String> mapProp = new HashMap<>();
    mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID_SOURCE.toString(), replState);
    AlterDatabaseSetPropertiesDesc alterDbDesc = new AlterDatabaseSetPropertiesDesc(dbName, mapProp, new ReplicationSpec(replState, replState));
    Task<?> updateReplIdTask = TaskFactory.get(new DDLWork(inputs, outputs, alterDbDesc, true, dumpDirectory, metricCollector), conf);
    // Link the update repl state task with dependency collection task
    if (preCursor != null) {
        preCursor.addDependentTask(updateReplIdTask);
        log.debug("Added {}:{} as a precursor of {}:{}", preCursor.getClass(), preCursor.getId(), updateReplIdTask.getClass(), updateReplIdTask.getId());
    }
    return updateReplIdTask;
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) HashMap(java.util.HashMap) AlterDatabaseSetPropertiesDesc(org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc)

Example 15 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class InsertHandler method handle.

@Override
public List<Task<?>> handle(Context withinContext) throws SemanticException {
    try {
        FileSystem fs = FileSystem.get(new Path(withinContext.location).toUri(), withinContext.hiveConf);
        MetaData metaData = EximUtil.readMetaData(fs, new Path(withinContext.location, EximUtil.METADATA_NAME));
        ReplicationSpec replicationSpec = metaData.getReplicationSpec();
        if (replicationSpec.isNoop()) {
            return Collections.emptyList();
        }
    } catch (Exception e) {
        LOG.error("failed to load insert event", e);
        throw new SemanticException(e);
    }
    InsertMessage insertMessage = deserializer.getInsertMessage(withinContext.dmd.getPayload());
    String actualDbName = withinContext.isDbNameEmpty() ? insertMessage.getDB() : withinContext.dbName;
    Context currentContext = new Context(withinContext, actualDbName, withinContext.getDumpDirectory(), withinContext.getMetricCollector());
    // Piggybacking in Import logic for now
    TableHandler tableHandler = new TableHandler();
    List<Task<?>> tasks = tableHandler.handle(currentContext);
    readEntitySet.addAll(tableHandler.readEntities());
    writeEntitySet.addAll(tableHandler.writeEntities());
    getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
    return tasks;
}
Also used : Path(org.apache.hadoop.fs.Path) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Task(org.apache.hadoop.hive.ql.exec.Task) InsertMessage(org.apache.hadoop.hive.metastore.messaging.InsertMessage) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) FileSystem(org.apache.hadoop.fs.FileSystem) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

ReplicationSpec (org.apache.hadoop.hive.ql.parse.ReplicationSpec)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)11 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)9 Table (org.apache.hadoop.hive.ql.metadata.Table)8 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)7 Database (org.apache.hadoop.hive.metastore.api.Database)6 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)6 ArrayList (java.util.ArrayList)5 Partition (org.apache.hadoop.hive.ql.metadata.Partition)5 IOException (java.io.IOException)4 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)4 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)4 Path (org.apache.hadoop.fs.Path)3 TableName (org.apache.hadoop.hive.common.TableName)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 FileNotFoundException (java.io.FileNotFoundException)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2