Search in sources :

Example 76 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class SemanticAnalyzer method createPreInsertDesc.

private void createPreInsertDesc(Table table, boolean overwrite) {
    PreInsertTableDesc preInsertTableDesc = new PreInsertTableDesc(table, overwrite);
    this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc)));
}
Also used : DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc)

Example 77 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class ImportSemanticAnalyzer method addSinglePartition.

private static Task<?> addSinglePartition(ImportTableDesc tblDesc, Table table, Warehouse wh, AlterTableAddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isReplication, String dumpRoot, ReplicationMetricCollector metricCollector) throws MetaException, IOException, HiveException {
    AlterTableAddPartitionDesc.PartitionDesc partSpec = addPartitionDesc.getPartitions().get(0);
    boolean isSkipTrash = false;
    boolean needRecycle = false;
    if (shouldSkipDataCopyInReplScope(tblDesc, replicationSpec) || (tblDesc.isExternal() && tblDesc.getLocation() == null)) {
        x.getLOG().debug("Adding AddPart and skipped data copy for partition " + partSpecToString(partSpec.getPartSpec()));
        // addPartitionDesc already has the right partition location
        @SuppressWarnings("unchecked") Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc, isReplication, dumpRoot, metricCollector), x.getConf());
        return addPartTask;
    } else {
        String srcLocation = partSpec.getLocation();
        if (replicationSpec.isInReplicationScope() && !ReplicationSpec.Type.IMPORT.equals(replicationSpec.getReplSpecType())) {
            Path partLocation = new Path(partSpec.getLocation());
            Path dataDirBase = partLocation.getParent();
            String bucketDir = partLocation.getName();
            for (int i = 1; i < partSpec.getPartSpec().size(); i++) {
                bucketDir = dataDirBase.getName() + File.separator + bucketDir;
                dataDirBase = dataDirBase.getParent();
            }
            String relativePartDataPath = EximUtil.DATA_PATH_NAME + File.separator + bucketDir;
            srcLocation = new Path(dataDirBase, relativePartDataPath).toString();
        }
        fixLocationInPartSpec(tblDesc, table, wh, replicationSpec, partSpec, x);
        x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
        Path tgtLocation = new Path(partSpec.getLocation());
        LoadFileType loadFileType;
        Path destPath;
        if (replicationSpec.isInReplicationScope()) {
            loadFileType = LoadFileType.IGNORE;
            destPath = tgtLocation;
            isSkipTrash = MetaStoreUtils.isSkipTrash(table.getParameters());
            if (table.isTemporary()) {
                needRecycle = false;
            } else {
                org.apache.hadoop.hive.metastore.api.Database db = x.getHive().getDatabase(table.getDbName());
                needRecycle = db != null && ReplChangeManager.shouldEnableCm(db, table.getTTable());
            }
        } else {
            loadFileType = replicationSpec.isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING;
            // Replication scope the write id will be invalid
            boolean useStagingDirectory = !AcidUtils.isTransactionalTable(table.getParameters()) || replicationSpec.isInReplicationScope();
            destPath = useStagingDirectory ? x.getCtx().getExternalTmpPath(tgtLocation) : new Path(tgtLocation, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
        }
        Path moveTaskSrc = !AcidUtils.isTransactionalTable(table.getParameters()) || replicationSpec.isInReplicationScope() ? destPath : tgtLocation;
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + writeId + " for " + partSpecToString(partSpec.getPartSpec()) + ": " + (AcidUtils.isFullAcidTable(table) ? "acid" : (AcidUtils.isInsertOnlyTable(table) ? "mm" : "flat")));
        }
        Task<?> copyTask = null;
        if (replicationSpec.isInReplicationScope()) {
            boolean copyAtLoad = x.getConf().getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET);
            copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), destPath, x.getConf(), isSkipTrash, needRecycle, copyAtLoad, dumpRoot, metricCollector);
        } else {
            copyTask = TaskFactory.get(new CopyWork(new Path(srcLocation), destPath, false, dumpRoot, metricCollector, isReplication));
        }
        Task<?> addPartTask = null;
        if (x.getEventType() != DumpType.EVENT_COMMIT_TXN) {
            // During replication, by the time we are applying commit transaction event, we expect
            // the partition/s to be already added or altered by previous events. So no need to
            // create add partition event again.
            addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc, isReplication, dumpRoot, metricCollector), x.getConf());
        }
        MoveWork moveWork = new MoveWork(x.getInputs(), x.getOutputs(), null, null, false, dumpRoot, metricCollector, isReplication);
        // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
        if (replicationSpec.isInReplicationScope() && AcidUtils.isTransactionalTable(tblDesc.getTblProps())) {
            LoadMultiFilesDesc loadFilesWork = new LoadMultiFilesDesc(Collections.singletonList(destPath), Collections.singletonList(tgtLocation), true, null, null);
            moveWork.setMultiFilesDesc(loadFilesWork);
            moveWork.setNeedCleanTarget(replicationSpec.isReplace());
        } else {
            LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), loadFileType, writeId);
            loadTableWork.setStmtId(stmtId);
            loadTableWork.setInheritTableSpecs(false);
            moveWork.setLoadTableWork(loadTableWork);
        }
        if (loadFileType == LoadFileType.IGNORE) {
            // update which is again done in load operations as part of move task.
            if (x.getEventType() == DumpType.EVENT_INSERT) {
                copyTask.addDependentTask(TaskFactory.get(moveWork, x.getConf()));
            } else {
                if (addPartTask != null) {
                    copyTask.addDependentTask(addPartTask);
                }
            }
            return copyTask;
        }
        Task<?> loadPartTask = TaskFactory.get(moveWork, x.getConf());
        copyTask.addDependentTask(loadPartTask);
        if (addPartTask != null) {
            addPartTask.addDependentTask(loadPartTask);
            x.getTasks().add(copyTask);
            return addPartTask;
        }
        return copyTask;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) LoadFileType(org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType) LoadMultiFilesDesc(org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 78 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class TestReplicationMetricUpdateOnFailure method testDDLTaskFailure.

/*
   * Check update on metrics upon intermediate task failures(not repl-dump / repl-load).
   * Here, DDLTask is used as the intermediate task, other task failures should behave in similar fashion.
   */
@Test
public void testDDLTaskFailure() throws Exception {
    // task-setup for DDL-Task
    DDLWork ddlWork = Mockito.mock(DDLWork.class);
    Context context = Mockito.mock(Context.class);
    Mockito.when(context.getExplainAnalyze()).thenReturn(ExplainConfiguration.AnalyzeState.ANALYZING);
    Mockito.when(ddlWork.isReplication()).thenReturn(true);
    String dumpDir = TEST_PATH + Path.SEPARATOR + testName.getMethodName();
    Mockito.when(ddlWork.getDumpDirectory()).thenReturn(dumpDir);
    Task<DDLWork> ddlTask = TaskFactory.get(ddlWork, conf);
    ddlTask.initialize(null, null, null, context);
    IncrementalLoadMetricCollector metricCollector = new IncrementalLoadMetricCollector(null, TEST_PATH, 1, conf);
    Mockito.when(ddlWork.getMetricCollector()).thenReturn(metricCollector);
    // setup for 2 runs - first recoverable and second non-recoverable
    Mockito.when(ddlWork.getDDLDesc()).thenThrow(recoverableException, nonRecoverableException);
    String stageName = "REPL_LOAD";
    // test recoverable error during DDL-Task
    metricCollector.reportStageStart(stageName, new HashMap<>());
    ddlTask.execute();
    performRecoverableChecks(stageName);
    // test non-recoverable error during DDL-Task
    metricCollector.reportStageStart(stageName, new HashMap<>());
    ddlTask.execute();
    performNonRecoverableChecks(dumpDir, stageName);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) Test(org.junit.Test)

Example 79 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class HiveAuthorizationTaskFactoryImpl method createRevokeTask.

@Override
public Task<?> createRevokeTask(ASTNode ast, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    List<PrivilegeDesc> privilegeDesc = analyzePrivilegeListDef((ASTNode) ast.getChild(0));
    List<PrincipalDesc> principalDesc = AuthorizationParseUtils.analyzePrincipalListDef((ASTNode) ast.getChild(1));
    PrivilegeObjectDesc hiveObj = null;
    boolean grantOption = false;
    if (ast.getChildCount() > 2) {
        ASTNode astChild = (ASTNode) ast.getChild(2);
        hiveObj = analyzePrivilegeObject(astChild, outputs);
        if (null != ast.getFirstChildWithType(HiveParser.TOK_GRANT_OPTION_FOR)) {
            grantOption = true;
        }
    }
    RevokeDesc revokeDesc = new RevokeDesc(privilegeDesc, principalDesc, hiveObj, grantOption);
    return TaskFactory.get(new DDLWork(inputs, outputs, revokeDesc));
}
Also used : PrincipalDesc(org.apache.hadoop.hive.ql.ddl.privilege.PrincipalDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) PrivilegeObjectDesc(org.apache.hadoop.hive.ql.ddl.privilege.PrivilegeObjectDesc) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) RevokeDesc(org.apache.hadoop.hive.ql.ddl.privilege.revoke.RevokeDesc) PrivilegeDesc(org.apache.hadoop.hive.ql.ddl.privilege.PrivilegeDesc)

Example 80 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class TruncateTableHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    AlterTableMessage msg = deserializer.getAlterTableMessage(context.dmd.getPayload());
    final TableName tName = TableName.fromString(msg.getTable(), null, context.isDbNameEmpty() ? msg.getDB() : context.dbName);
    TruncateTableDesc truncateTableDesc = new TruncateTableDesc(tName, null, context.eventOnlyReplicationSpec());
    truncateTableDesc.setWriteId(msg.getWriteId());
    Task<DDLWork> truncateTableTask = TaskFactory.get(new DDLWork(readEntitySet, writeEntitySet, truncateTableDesc, true, context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
    context.log.debug("Added truncate tbl task : {}:{}:{}", truncateTableTask.getId(), truncateTableDesc.getTableName(), truncateTableDesc.getWriteId());
    updatedMetadata.set(context.dmd.getEventTo().toString(), tName.getDb(), tName.getTable(), null);
    try {
        return ReplUtils.addChildTask(truncateTableTask);
    } catch (Exception e) {
        throw new SemanticException(e.getMessage());
    }
}
Also used : TableName(org.apache.hadoop.hive.common.TableName) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) TruncateTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.truncate.TruncateTableDesc) AlterTableMessage(org.apache.hadoop.hive.metastore.messaging.AlterTableMessage) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)153 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)61 Table (org.apache.hadoop.hive.ql.metadata.Table)34 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)31 ASTNode (org.apache.hadoop.hive.ql.parse.ASTNode)24 TableName (org.apache.hadoop.hive.common.TableName)23 Test (org.junit.Test)23 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)22 PrincipalDesc (org.apache.hadoop.hive.ql.ddl.privilege.PrincipalDesc)21 ArrayList (java.util.ArrayList)18 Path (org.apache.hadoop.fs.Path)15 HashMap (java.util.HashMap)14 Database (org.apache.hadoop.hive.metastore.api.Database)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)11 Tree (org.antlr.runtime.tree.Tree)10 HashSet (java.util.HashSet)9 Context (org.apache.hadoop.hive.ql.Context)9 PrivilegeDesc (org.apache.hadoop.hive.ql.ddl.privilege.PrivilegeDesc)9 ShowRoleGrantDesc (org.apache.hadoop.hive.ql.ddl.privilege.show.rolegrant.ShowRoleGrantDesc)8