Search in sources :

Example 1 with ReplTxnWork

use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.

the class OpenTxnHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
        context.log.error("Cannot load transaction events as acid is not enabled");
        throw new SemanticException("Cannot load transaction events as acid is not enabled");
    }
    OpenTxnMessage msg = deserializer.getOpenTxnMessage(context.dmd.getPayload());
    Task<ReplTxnWork> openTxnTask = TaskFactory.get(new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnIds(), ReplTxnWork.OperationType.REPL_OPEN_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
    // Anyways, if this event gets executed again, it is taken care of.
    if (!context.isDbNameEmpty()) {
        updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
    }
    context.log.debug("Added Open txn task : {}", openTxnTask.getId());
    return Collections.singletonList(openTxnTask);
}
Also used : OpenTxnMessage(org.apache.hadoop.hive.metastore.messaging.OpenTxnMessage) ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with ReplTxnWork

use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.

the class AllocWriteIdHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
        context.log.error("Cannot load alloc write id event as acid is not enabled");
        throw new SemanticException("Cannot load alloc write id event as acid is not enabled");
    }
    AllocWriteIdMessage msg = deserializer.getAllocWriteIdMessage(context.dmd.getPayload());
    String dbName = (context.dbName != null && !context.dbName.isEmpty() ? context.dbName : msg.getDB());
    // We need table name for alloc write id and that is received from source.
    String tableName = msg.getTableName();
    // Repl policy should be created based on the table name in context.
    ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), dbName, tableName, ReplTxnWork.OperationType.REPL_ALLOC_WRITE_ID, msg.getTxnToWriteIdList(), context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
    Task<?> allocWriteIdTask = TaskFactory.get(work, context.hiveConf);
    context.log.info("Added alloc write id task : {}", allocWriteIdTask.getId());
    updatedMetadata.set(context.dmd.getEventTo().toString(), dbName, tableName, null);
    return Collections.singletonList(allocWriteIdTask);
}
Also used : ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) AllocWriteIdMessage(org.apache.hadoop.hive.metastore.messaging.AllocWriteIdMessage)

Example 3 with ReplTxnWork

use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.

the class CommitTxnHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
        context.log.error("Cannot load transaction events as acid is not enabled");
        throw new SemanticException("Cannot load transaction events as acid is not enabled");
    }
    CommitTxnMessage msg = deserializer.getCommitTxnMessage(context.dmd.getPayload());
    int numEntry = (msg.getTables() == null ? 0 : msg.getTables().size());
    List<Task<?>> tasks = new ArrayList<>();
    String dbName = context.dbName;
    String tableNamePrev = null;
    String tblName = null;
    ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_COMMIT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
    if (numEntry > 0) {
        context.log.debug("Commit txn handler for txnid " + msg.getTxnId() + " databases : " + msg.getDatabases() + " tables : " + msg.getTables() + " partitions : " + msg.getPartitions() + " files : " + msg.getFilesList() + " write ids : " + msg.getWriteIds());
    }
    for (int idx = 0; idx < numEntry; idx++) {
        String actualTblName = msg.getTables().get(idx);
        String actualDBName = msg.getDatabases().get(idx);
        String completeName = Table.getCompleteName(actualDBName, actualTblName);
        // grouped together in commit txn message.
        if (tableNamePrev == null || !(completeName.equals(tableNamePrev))) {
            // The data location is created by source, so the location should be formed based on the table name in msg.
            Path location = HiveUtils.getDumpPath(new Path(context.location), actualDBName, actualTblName);
            tblName = actualTblName;
            // for warehouse level dump, use db name from write event
            dbName = (context.isDbNameEmpty() ? actualDBName : context.dbName);
            Context currentContext = new Context(context, dbName, context.getDumpDirectory(), context.getMetricCollector());
            currentContext.setLocation(location.toUri().toString());
            // Piggybacking in Import logic for now
            TableHandler tableHandler = new TableHandler();
            tasks.addAll((tableHandler.handle(currentContext)));
            readEntitySet.addAll(tableHandler.readEntities());
            writeEntitySet.addAll(tableHandler.writeEntities());
            getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
            tableNamePrev = completeName;
        }
        try {
            WriteEventInfo writeEventInfo = new WriteEventInfo(msg.getWriteIds().get(idx), dbName, tblName, msg.getFiles(idx));
            if (msg.getPartitions().get(idx) != null && !msg.getPartitions().get(idx).isEmpty()) {
                writeEventInfo.setPartition(msg.getPartitions().get(idx));
            }
            work.addWriteEventInfo(writeEventInfo);
        } catch (Exception e) {
            throw new SemanticException("Failed to extract write event info from commit txn message : " + e.getMessage());
        }
    }
    Task<ReplTxnWork> commitTxnTask = TaskFactory.get(work, context.hiveConf);
    // Anyways, if this event gets executed again, it is taken care of.
    if (!context.isDbNameEmpty()) {
        updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
    }
    context.log.debug("Added Commit txn task : {}", commitTxnTask.getId());
    if (tasks.isEmpty()) {
        // will be used for setting the last repl id.
        return Collections.singletonList(commitTxnTask);
    }
    DAGTraversal.traverse(tasks, new AddDependencyToLeaves(commitTxnTask));
    return tasks;
}
Also used : Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) ArrayList(java.util.ArrayList) CommitTxnMessage(org.apache.hadoop.hive.metastore.messaging.CommitTxnMessage) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) WriteEventInfo(org.apache.hadoop.hive.metastore.api.WriteEventInfo) ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Example 4 with ReplTxnWork

use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.

the class AbortTxnHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
        context.log.error("Cannot load transaction events as acid is not enabled");
        throw new SemanticException("Cannot load transaction events as acid is not enabled");
    }
    AbortTxnMessage msg = deserializer.getAbortTxnMessage(context.dmd.getPayload());
    Task<ReplTxnWork> abortTxnTask = TaskFactory.get(new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_ABORT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
    // Anyways, if this event gets executed again, it is taken care of.
    if (!context.isDbNameEmpty()) {
        updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
    }
    context.log.debug("Added Abort txn task : {}", abortTxnTask.getId());
    return Collections.singletonList(abortTxnTask);
}
Also used : AbortTxnMessage(org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage) ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 5 with ReplTxnWork

use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.

the class LoadTable method newTableTasks.

private void newTableTasks(ImportTableDesc tblDesc, Task<?> tblRootTask, TableLocationTuple tuple) throws Exception {
    Table table = tblDesc.toTable(context.hiveConf);
    ReplicationSpec replicationSpec = event.replicationSpec();
    if (!tblDesc.isExternal()) {
        tblDesc.setLocation(null);
    }
    Task<?> createTableTask = tblDesc.getCreateTableTask(new HashSet<>(), new HashSet<>(), context.hiveConf, true, (new Path(context.dumpDirectory)).getParent().toString(), metricCollector, true);
    if (tblRootTask == null) {
        tblRootTask = createTableTask;
    } else {
        tblRootTask.addDependentTask(createTableTask);
    }
    if (replicationSpec.isMetadataOnly()) {
        tracker.addTask(tblRootTask);
        return;
    }
    Task<?> parentTask = createTableTask;
    if (replicationSpec.isTransactionalTableDump()) {
        List<String> partNames = isPartitioned(tblDesc) ? event.partitions(tblDesc) : null;
        ReplTxnWork replTxnWork = new ReplTxnWork(tblDesc.getDatabaseName(), tblDesc.getTableName(), partNames, replicationSpec.getValidWriteIdList(), ReplTxnWork.OperationType.REPL_WRITEID_STATE, (new Path(context.dumpDirectory)).getParent().toString(), metricCollector);
        Task<?> replTxnTask = TaskFactory.get(replTxnWork, context.hiveConf);
        parentTask.addDependentTask(replTxnTask);
        parentTask = replTxnTask;
    }
    boolean shouldCreateLoadTableTask = (!isPartitioned(tblDesc) && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) || tuple.isConvertedFromManagedToExternal;
    if (shouldCreateLoadTableTask) {
        LOG.debug("adding dependent ReplTxnTask/CopyWork/MoveWork for table");
        Task<?> loadTableTask = loadTableTask(table, replicationSpec, table.getDataLocation(), event.dataPath());
        parentTask.addDependentTask(loadTableTask);
    }
    tracker.addTask(tblRootTask);
}
Also used : Path(org.apache.hadoop.fs.Path) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork)

Aggregations

ReplTxnWork (org.apache.hadoop.hive.ql.plan.ReplTxnWork)5 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 Path (org.apache.hadoop.fs.Path)2 ArrayList (java.util.ArrayList)1 WriteEventInfo (org.apache.hadoop.hive.metastore.api.WriteEventInfo)1 AbortTxnMessage (org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage)1 AllocWriteIdMessage (org.apache.hadoop.hive.metastore.messaging.AllocWriteIdMessage)1 CommitTxnMessage (org.apache.hadoop.hive.metastore.messaging.CommitTxnMessage)1 OpenTxnMessage (org.apache.hadoop.hive.metastore.messaging.OpenTxnMessage)1 Task (org.apache.hadoop.hive.ql.exec.Task)1 AddDependencyToLeaves (org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 ReplicationSpec (org.apache.hadoop.hive.ql.parse.ReplicationSpec)1