Examples with DDLWork - org.apache.hadoop.hive.ql.ddl.DDLWork

Example 71 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class TestHiveAuthorizationTaskFactory method testShowGrantGroupOnTable.

/**
 * SHOW GRANT GROUP ... ON TABLE ...
 */
@Test
public void testShowGrantGroupOnTable() throws Exception {
    DDLWork work = analyze("SHOW GRANT GROUP " + GROUP + " ON TABLE " + TABLE);
    ShowGrantDesc grantDesc = (ShowGrantDesc) work.getDDLDesc();
    Assert.assertNotNull("Show grant should not be null", grantDesc);
    Assert.assertEquals(PrincipalType.GROUP, grantDesc.getPrincipalDesc().getType());
    Assert.assertEquals(GROUP, grantDesc.getPrincipalDesc().getName());
    Assert.assertTrue("Expected table", grantDesc.getHiveObj().getTable());
    Assert.assertEquals(TABLE_QNAME, grantDesc.getHiveObj().getObject());
    Assert.assertTrue("Expected table", grantDesc.getHiveObj().getTable());
}

Also used : DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) ShowGrantDesc(org.apache.hadoop.hive.ql.ddl.privilege.show.grant.ShowGrantDesc) Test(org.junit.Test)

Example 72 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class TestHiveAuthorizationTaskFactory method testRevokeUserTable.

/**
 * REVOKE ... ON TABLE ... FROM USER ...
 */
@Test
public void testRevokeUserTable() throws Exception {
    DDLWork work = analyze("REVOKE " + SELECT + " ON TABLE " + TABLE + " FROM USER " + USER);
    RevokeDesc grantDesc = (RevokeDesc) work.getDDLDesc();
    Assert.assertNotNull("Revoke should not be null", grantDesc);
    for (PrincipalDesc principal : ListSizeMatcher.inList(grantDesc.getPrincipals()).ofSize(1)) {
        Assert.assertEquals(PrincipalType.USER, principal.getType());
        Assert.assertEquals(USER, principal.getName());
    }
    for (PrivilegeDesc privilege : ListSizeMatcher.inList(grantDesc.getPrivileges()).ofSize(1)) {
        Assert.assertEquals(Privilege.SELECT, privilege.getPrivilege());
    }
    Assert.assertTrue("Expected table", grantDesc.getPrivilegeSubject().getTable());
    Assert.assertEquals(TABLE_QNAME, grantDesc.getPrivilegeSubject().getObject());
}

Also used : PrincipalDesc(org.apache.hadoop.hive.ql.ddl.privilege.PrincipalDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) RevokeDesc(org.apache.hadoop.hive.ql.ddl.privilege.revoke.RevokeDesc) PrivilegeDesc(org.apache.hadoop.hive.ql.ddl.privilege.PrivilegeDesc) Test(org.junit.Test)

Example 73 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class AuthorizationTestUtil method analyze.

public static DDLWork analyze(ASTNode ast, QueryState queryState, Hive db) throws Exception {
    BaseSemanticAnalyzer analyzer = DDLSemanticAnalyzerFactory.getAnalyzer(ast, queryState, db);
    SessionState.start(queryState.getConf());
    analyzer.analyze(ast, new Context(queryState.getConf()));
    List<Task<?>> rootTasks = analyzer.getRootTasks();
    return (DDLWork) inList(rootTasks).ofSize(1).get(0).getWork();
}

Also used : Context(org.apache.hadoop.hive.ql.Context) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) Task(org.apache.hadoop.hive.ql.exec.Task) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork)

Example 74 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class TaskCompiler method compile.

@SuppressWarnings("nls")
public void compile(final ParseContext pCtx, final List<Task<?>> rootTasks, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) throws SemanticException {
    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<>();
    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
    boolean directInsertCtas = false;
    if (pCtx.getCreateTable() != null && pCtx.getCreateTable().getStorageHandler() != null) {
        try {
            directInsertCtas = HiveUtils.getStorageHandler(conf, pCtx.getCreateTable().getStorageHandler()).directInsertCTAS();
        } catch (HiveException e) {
            throw new SemanticException("Failed to load storage handler:  " + e.getMessage());
        }
    }
    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }
        return;
    }
    if (!pCtx.getQueryProperties().isAnalyzeCommand()) {
        LOG.debug("Skipping optimize operator plan for analyze command.");
        optimizeOperatorPlan(pCtx);
    }
    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }
        LoadFileDesc loadFileDesc = loadFileWork.get(0);
        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();
        TableDesc resultTab = pCtx.getFetchTableDesc();
        boolean shouldSetOutputFormatter = false;
        if (resultTab == null) {
            ResultFileFormat resFileFormat = conf.getResultFileFormat();
            String fileFormat;
            Class<? extends Deserializer> serdeClass;
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && resFileFormat == ResultFileFormat.SEQUENCEFILE) {
                fileFormat = resFileFormat.toString();
                serdeClass = ThriftJDBCBinarySerDe.class;
                shouldSetOutputFormatter = true;
            } else if (resFileFormat == ResultFileFormat.SEQUENCEFILE) {
                // file format is changed so that IF file sink provides list of files to fetch from (instead
                // of whole directory) list status is done on files (which is what HiveSequenceFileInputFormat does)
                fileFormat = "HiveSequenceFile";
                serdeClass = LazySimpleSerDe.class;
            } else {
                // All other cases we use the defined file format and LazySimpleSerde
                fileFormat = resFileFormat.toString();
                serdeClass = LazySimpleSerDe.class;
            }
            resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
        } else {
            shouldSetOutputFormatter = resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName());
        }
        if (shouldSetOutputFormatter) {
            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we will
            // read formatted thrift objects from the output SequenceFile written by Tasks.
            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
        }
        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }
        // The idea here is to keep an object reference both in FileSink and in FetchTask for list of files
        // to be fetched. During Job close file sink will populate the list and fetch task later will use it
        // to fetch the results.
        Collection<Operator<?>> tableScanOps = Lists.<Operator<?>>newArrayList(pCtx.getTopOps().values());
        Set<FileSinkOperator> fsOps = OperatorUtils.findOperators(tableScanOps, FileSinkOperator.class);
        if (fsOps != null && fsOps.size() == 1) {
            FileSinkOperator op = fsOps.iterator().next();
            Set<FileStatus> filesToFetch = new HashSet<>();
            op.getConf().setFilesToFetch(filesToFetch);
            fetch.setFilesToFetch(filesToFetch);
        }
        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch));
        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();
        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            mvTask.add(tsk);
        }
        boolean oneLoadFileForCtas = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                if (!oneLoadFileForCtas) {
                    // should not have more than 1 load file for CTAS.
                    throw new SemanticException("One query is not expected to contain multiple CTAS loads statements");
                }
                setLoadFileLocation(pCtx, lfd);
                oneLoadFileForCtas = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false)));
        }
    }
    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
    // For each task, set the key descriptor for the reducer
    for (Task<?> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }
    // to be used, please do so
    for (Task<?> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }
    optimizeTaskPlan(rootTasks, pCtx, ctx);
    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     * As per HIVE-15903, we will also collect table stats when user computes column stats.
     * That means, if isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()
     * We need to collect table stats
     * if isCStats, we need to include a basic stats task
     * else it is ColumnStatsAutoGather, which should have a move task with a stats task already.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        // map from tablename to task (ColumnStatsTask which includes a BasicStatsTask)
        Map<String, StatsTask> map = new LinkedHashMap<>();
        if (isCStats) {
            if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null || pCtx.getTopOps().size() != 1) {
                throw new SemanticException("Can not find correct root task!");
            }
            try {
                Task<?> root = rootTasks.iterator().next();
                StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values().iterator().next(), root, outputs);
                root.addDependentTask(tsk);
                map.put(extractTableFullName(tsk), tsk);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0);
        } else {
            Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
            getLeafTasks(rootTasks, leafTasks);
            List<Task<?>> nonStatsLeafTasks = new ArrayList<>();
            for (Task<?> tsk : leafTasks) {
                // map table name to the correct ColumnStatsTask
                if (tsk instanceof StatsTask) {
                    map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk);
                } else {
                    nonStatsLeafTasks.add(tsk);
                }
            }
            // add cStatsTask as a dependent of all the nonStatsLeafTasks
            for (Task<?> tsk : nonStatsLeafTasks) {
                for (Task<?> cStatsTask : map.values()) {
                    tsk.addDependentTask(cStatsTask);
                }
            }
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, numBitVector);
                }
            }
        }
    }
    decideExecMode(rootTasks, ctx, globalLimitCtx);
    // ahead of time by the non-native table
    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization() && !directInsertCtas) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();
        crtTblDesc.validate(conf);
        Task<?> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtTblTask, CollectionUtils.isEmpty(crtTblDesc.getPartColNames()));
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateMaterializedViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<?> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtViewTask, CollectionUtils.isEmpty(viewDesc.getPartColNames()));
    } else if (pCtx.getMaterializedViewUpdateDesc() != null) {
        // If there is a materialized view update desc, we create introduce it at the end
        // of the tree.
        MaterializedViewUpdateDesc materializedViewDesc = pCtx.getMaterializedViewUpdateDesc();
        DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewDesc);
        Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
        getLeafTasks(rootTasks, leafTasks);
        Task<?> materializedViewTask = TaskFactory.get(ddlWork, conf);
        for (Task<?> task : leafTasks) {
            task.addDependentTask(materializedViewTask);
        }
    }
    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }
    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
    }
    Interner<TableDesc> interner = Interners.newStrongInterner();
    // Perform Final chores on generated Map works
    // 1.  Intern the table descriptors
    // 2.  Derive final explain attributes based on previous compilation.
    GenMapRedUtils.finalMapWorkChores(rootTasks, pCtx.getConf(), interner);
}

Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LinkedHashSet(java.util.LinkedHashSet) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) BasicStatsNoJobTask(org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileStatus(org.apache.hadoop.fs.FileStatus) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ThriftFormatter(org.apache.hadoop.hive.serde2.thrift.ThriftFormatter) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ResultFileFormat(org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) MaterializedViewUpdateDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) DefaultFetchFormatter(org.apache.hadoop.hive.serde2.DefaultFetchFormatter) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc)

Example 75 with DDLWork

use of org.apache.hadoop.hive.ql.ddl.DDLWork in project hive by apache.

the class SemanticAnalyzer method analyzeCreateTable.

/**
 * Analyze the create table command. If it is a regular create-table or
 * create-table-like statements, we create a DDLWork and return true. If it is
 * a create-table-as-select, we get the necessary info such as the SerDe and
 * Storage Format and put it in QB, and return false, indicating the rest of
 * the semantic analyzer need to deal with the select statement with respect
 * to the SerDe and Storage Format.
 */
ASTNode analyzeCreateTable(ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException {
    TableName qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0));
    final String dbDotTab = qualifiedTabName.getNotEmptyDbTable();
    String likeTableName = null;
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
    List<String> partColNames = new ArrayList<>();
    List<String> bucketCols = new ArrayList<String>();
    List<SQLPrimaryKey> primaryKeys = new ArrayList<SQLPrimaryKey>();
    List<SQLForeignKey> foreignKeys = new ArrayList<SQLForeignKey>();
    List<SQLUniqueConstraint> uniqueConstraints = new ArrayList<>();
    List<SQLNotNullConstraint> notNullConstraints = new ArrayList<>();
    List<SQLDefaultConstraint> defaultConstraints = new ArrayList<>();
    List<SQLCheckConstraint> checkConstraints = new ArrayList<>();
    List<Order> sortCols = new ArrayList<Order>();
    int numBuckets = -1;
    String comment = null;
    String location = null;
    Map<String, String> tblProps = null;
    boolean ifNotExists = false;
    boolean isExt = false;
    boolean isTemporary = false;
    boolean isManaged = false;
    boolean isMaterialization = false;
    boolean isTransactional = false;
    ASTNode selectStmt = null;
    // regular CREATE TABLE
    final int CREATE_TABLE = 0;
    // CREATE TABLE LIKE ... (CTLT)
    final int CTLT = 1;
    // CREATE TABLE AS SELECT ... (CTAS)
    final int CTAS = 2;
    // CREATE TRANSACTIONAL TABLE
    final int ctt = 3;
    int command_type = CREATE_TABLE;
    List<String> skewedColNames = new ArrayList<String>();
    List<List<String>> skewedValues = new ArrayList<List<String>>();
    Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>();
    boolean storedAsDirs = false;
    boolean isUserStorageFormat = false;
    boolean partitionTransformSpecExists = false;
    RowFormatParams rowFormatParams = new RowFormatParams();
    StorageFormat storageFormat = new StorageFormat(conf);
    LOG.info("Creating table " + dbDotTab + " position=" + ast.getCharPositionInLine());
    int numCh = ast.getChildCount();
    // set storage handler if default handler is provided in config
    String defaultStorageHandler = HiveConf.getVar(conf, HIVE_DEFAULT_STORAGE_HANDLER);
    if (defaultStorageHandler != null && !defaultStorageHandler.isEmpty()) {
        LOG.info("Default storage handler class detected in config. Using storage handler class if exists: '{}'", defaultStorageHandler);
        storageFormat.setStorageHandler(defaultStorageHandler);
        isUserStorageFormat = true;
    }
    /*
     * Check the 1st-level children and do simple semantic checks: 1) CTLT and
     * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column
     * list (target table schema). 3) CTAS does not support partitioning (for
     * now).
     */
    for (int num = 1; num < numCh; num++) {
        ASTNode child = (ASTNode) ast.getChild(num);
        if (storageFormat.fillStorageFormat(child)) {
            isUserStorageFormat = true;
            continue;
        }
        switch(child.getToken().getType()) {
            case HiveParser.TOK_IFNOTEXISTS:
                ifNotExists = true;
                break;
            case HiveParser.KW_EXTERNAL:
                isExt = true;
                break;
            case HiveParser.KW_MANAGED:
                isManaged = true;
                isTransactional = true;
                break;
            case HiveParser.KW_TEMPORARY:
                isTemporary = true;
                isMaterialization = MATERIALIZATION_MARKER.equals(child.getText());
                break;
            case HiveParser.KW_TRANSACTIONAL:
                isTransactional = true;
                command_type = ctt;
                break;
            case HiveParser.TOK_LIKETABLE:
                if (child.getChildCount() > 0) {
                    likeTableName = getUnescapedName((ASTNode) child.getChild(0));
                    if (likeTableName != null) {
                        if (command_type == CTAS) {
                            throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg());
                        }
                        if (cols.size() != 0) {
                            throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg());
                        }
                    }
                    command_type = CTLT;
                }
                break;
            case // CTAS
            HiveParser.TOK_QUERY:
                if (command_type == CTLT) {
                    throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg());
                }
                if (cols.size() != 0) {
                    throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg());
                }
                if (partCols.size() != 0 || bucketCols.size() != 0) {
                    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
                    if (dynPart == false) {
                        throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
                    } else {
                        // TODO: support dynamic partition for CTAS
                        throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg());
                    }
                }
                if (!conf.getBoolVar(ConfVars.HIVE_CTAS_EXTERNAL_TABLES) && isExt) {
                    throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg());
                }
                command_type = CTAS;
                if (plannerCtx != null) {
                    plannerCtx.setCTASToken(child);
                }
                selectStmt = child;
                break;
            case HiveParser.TOK_TABCOLLIST:
                cols = getColumns(child, true, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf);
                break;
            case HiveParser.TOK_TABLECOMMENT:
                comment = unescapeSQLString(child.getChild(0).getText());
                break;
            case HiveParser.TOK_TABLEPARTCOLS:
                partCols = getColumns(child, false, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf);
                if (hasConstraints(partCols, defaultConstraints, notNullConstraints, checkConstraints)) {
                    // TODO: these constraints should be supported for partition columns
                    throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("NOT NULL,DEFAULT and CHECK Constraints are not allowed with " + "partition columns. "));
                }
                break;
            case HiveParser.TOK_TABLEPARTCOLSBYSPEC:
                List<PartitionTransformSpec> partitionTransformSpec = PartitionTransform.getPartitionTransformSpec(child);
                if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, partitionTransformSpec)) {
                    throw new SemanticException("Query state attached to Session state must be not null. " + "Partition transform metadata cannot be saved.");
                }
                partitionTransformSpecExists = true;
                break;
            case HiveParser.TOK_TABLEPARTCOLNAMES:
                partColNames = getColumnNames(child);
                break;
            case HiveParser.TOK_ALTERTABLE_BUCKETS:
                bucketCols = getColumnNames((ASTNode) child.getChild(0));
                if (child.getChildCount() == 2) {
                    numBuckets = Integer.parseInt(child.getChild(1).getText());
                } else {
                    sortCols = getColumnNamesOrder((ASTNode) child.getChild(1));
                    numBuckets = Integer.parseInt(child.getChild(2).getText());
                }
                break;
            case HiveParser.TOK_TABLEROWFORMAT:
                rowFormatParams.analyzeRowFormat(child);
                break;
            case HiveParser.TOK_TABLELOCATION:
                location = unescapeSQLString(child.getChild(0).getText());
                location = EximUtil.relativeToAbsolutePath(conf, location);
                inputs.add(toReadEntity(location));
                break;
            case HiveParser.TOK_TABLEPROPERTIES:
                tblProps = getProps((ASTNode) child.getChild(0));
                addPropertyReadEntry(tblProps, inputs);
                break;
            case HiveParser.TOK_TABLESERIALIZER:
                child = (ASTNode) child.getChild(0);
                storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText()));
                if (child.getChildCount() == 2) {
                    readProps((ASTNode) (child.getChild(1).getChild(0)), storageFormat.getSerdeProps());
                }
                break;
            case HiveParser.TOK_TABLESKEWED:
                /**
                 * Throw an error if the user tries to use the DDL with
                 * hive.internal.ddl.list.bucketing.enable set to false.
                 */
                HiveConf hiveConf = SessionState.get().getConf();
                // skewed column names
                skewedColNames = SkewedTableUtils.analyzeSkewedTableDDLColNames(child);
                // skewed value
                skewedValues = SkewedTableUtils.analyzeDDLSkewedValues(child);
                // stored as directories
                storedAsDirs = analyzeStoredAdDirs(child);
                break;
            default:
                throw new AssertionError("Unknown token: " + child.getToken());
        }
    }
    HiveStorageHandler handler;
    try {
        handler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler());
    } catch (HiveException e) {
        throw new SemanticException("Failed to load storage handler:  " + e.getMessage());
    }
    if (handler != null) {
        if (partitionTransformSpecExists && !handler.supportsPartitionTransform()) {
            throw new SemanticException("Partition transform is not supported for " + handler.getClass().getName());
        }
        String fileFormatPropertyKey = handler.getFileFormatPropertyKey();
        if (fileFormatPropertyKey != null) {
            if (tblProps != null && tblProps.containsKey(fileFormatPropertyKey) && storageFormat.getSerdeProps() != null && storageFormat.getSerdeProps().containsKey(fileFormatPropertyKey)) {
                String fileFormat = tblProps.get(fileFormatPropertyKey);
                throw new SemanticException("Provide only one of the following: STORED BY " + fileFormat + " or WITH SERDEPROPERTIES('" + fileFormatPropertyKey + "'='" + fileFormat + "') or" + " TBLPROPERTIES('" + fileFormatPropertyKey + "'='" + fileFormat + "')");
            }
        }
    }
    if (command_type == CREATE_TABLE || command_type == CTLT || command_type == ctt) {
        queryState.setCommandType(HiveOperation.CREATETABLE);
    } else if (command_type == CTAS) {
        queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT);
    } else {
        throw new SemanticException("Unrecognized command.");
    }
    if (isExt && ConstraintsUtils.hasEnabledOrValidatedConstraints(notNullConstraints, defaultConstraints, checkConstraints)) {
        throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Constraints are disallowed with External tables. " + "Only RELY is allowed."));
    }
    if (checkConstraints != null && !checkConstraints.isEmpty()) {
        ConstraintsUtils.validateCheckConstraint(cols, checkConstraints, ctx.getConf());
    }
    storageFormat.fillDefaultStorageFormat(isExt, false);
    // check for existence of table
    if (ifNotExists) {
        try {
            Table table = getTable(qualifiedTabName, false);
            if (table != null) {
                // table exists
                return null;
            }
        } catch (HiveException e) {
            // should not occur since second parameter to getTableWithQN is false
            throw new IllegalStateException("Unexpected Exception thrown: " + e.getMessage(), e);
        }
    }
    if (isTemporary) {
        if (location == null) {
            // it has the same life cycle as the tmp table
            try {
                // Generate a unique ID for temp table path.
                // This path will be fixed for the life of the temp table.
                location = SessionState.generateTempTableLocation(conf);
            } catch (MetaException err) {
                throw new SemanticException("Error while generating temp table path:", err);
            }
        }
    }
    switch(command_type) {
        case // REGULAR CREATE TABLE DDL
        CREATE_TABLE:
            if (!CollectionUtils.isEmpty(partColNames)) {
                throw new SemanticException("Partition columns can only declared using their name and types in regular CREATE TABLE statements");
            }
            tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional, isManaged);
            addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat);
            CreateTableDesc crtTblDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints);
            crtTblDesc.setStoredAsSubDirectories(storedAsDirs);
            crtTblDesc.setNullFormat(rowFormatParams.nullFormat);
            crtTblDesc.validate(conf);
            // outputs is empty, which means this create table happens in the current
            // database.
            rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc)));
            break;
        case // CREATE TRANSACTIONAL TABLE
        ctt:
            if (isExt) {
                throw new SemanticException(qualifiedTabName.getTable() + " cannot be declared transactional because it's an external table");
            }
            tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional, isManaged);
            addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, false, tblProps, storageFormat);
            CreateTableDesc crtTranTblDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints);
            crtTranTblDesc.setStoredAsSubDirectories(storedAsDirs);
            crtTranTblDesc.setNullFormat(rowFormatParams.nullFormat);
            crtTranTblDesc.validate(conf);
            // outputs is empty, which means this create table happens in the current
            // database.
            rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTranTblDesc)));
            break;
        case // create table like <tbl_name>
        CTLT:
            tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional, isManaged);
            addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat);
            Table likeTable = getTable(likeTableName, false);
            if (likeTable != null) {
                if (isTemporary || isExt) {
                    updateDefaultTblProps(likeTable.getParameters(), tblProps, new ArrayList<>(Arrays.asList(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES)));
                } else {
                    updateDefaultTblProps(likeTable.getParameters(), tblProps, null);
                }
            }
            CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(dbDotTab, isExt, isTemporary, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getSerdeProps(), tblProps, ifNotExists, likeTableName, isUserStorageFormat);
            rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc)));
            break;
        case // create table as select
        CTAS:
            if (isTemporary) {
                if (!ctx.isExplainSkipExecution() && !isMaterialization) {
                    SessionState ss = SessionState.get();
                    if (ss == null) {
                        throw new SemanticException("No current SessionState, cannot create temporary table " + qualifiedTabName.getNotEmptyDbTable());
                    }
                    Map<String, Table> tables = SessionHiveMetaStoreClient.getTempTablesForDatabase(qualifiedTabName.getDb(), qualifiedTabName.getTable());
                    if (tables != null && tables.containsKey(qualifiedTabName.getTable())) {
                        throw new SemanticException("Temporary table " + qualifiedTabName.getNotEmptyDbTable() + " already exists");
                    }
                }
            } else {
                // dumpTable is only used to check the conflict for non-temporary tables
                try {
                    Table dumpTable = db.newTable(dbDotTab);
                    if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx.isExplainSkipExecution()) {
                        throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab));
                    }
                } catch (HiveException e) {
                    throw new SemanticException(e);
                }
            }
            if (location != null && location.length() != 0) {
                Path locPath = new Path(location);
                FileSystem curFs = null;
                FileStatus locStats = null;
                try {
                    curFs = locPath.getFileSystem(conf);
                    if (curFs != null) {
                        locStats = curFs.getFileStatus(locPath);
                    }
                    if (locStats != null && locStats.isDir()) {
                        FileStatus[] lStats = curFs.listStatus(locPath);
                        if (lStats != null && lStats.length != 0) {
                            // Don't throw an exception if the target location only contains the staging-dirs
                            for (FileStatus lStat : lStats) {
                                if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR))) {
                                    throw new SemanticException(ErrorMsg.CTAS_LOCATION_NONEMPTY.getMsg(location));
                                }
                            }
                        }
                    }
                } catch (FileNotFoundException nfe) {
                // we will create the folder if it does not exist.
                } catch (IOException ioE) {
                    LOG.debug("Exception when validate folder", ioE);
                }
            }
            if (!CollectionUtils.isEmpty(partCols)) {
                throw new SemanticException("Partition columns can only declared using their names in CTAS statements");
            }
            tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional, isManaged);
            tblProps.put(TABLE_IS_CTAS, "true");
            addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat);
            tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partColNames, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, true, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints);
            tableDesc.setMaterialization(isMaterialization);
            tableDesc.setStoredAsSubDirectories(storedAsDirs);
            tableDesc.setNullFormat(rowFormatParams.nullFormat);
            qb.setTableDesc(tableDesc);
            return selectStmt;
        default:
            throw new SemanticException("Unrecognized command.");
    }
    return null;
}

Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) FileSystem(org.apache.hadoop.fs.FileSystem) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) HiveConf(org.apache.hadoop.hive.conf.HiveConf) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Order(org.apache.hadoop.hive.metastore.api.Order) SQLPrimaryKey(org.apache.hadoop.hive.metastore.api.SQLPrimaryKey) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) TableName(org.apache.hadoop.hive.common.TableName) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLForeignKey(org.apache.hadoop.hive.metastore.api.SQLForeignKey) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) CreateTableLikeDesc(org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) Path(org.apache.hadoop.fs.Path) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) IOException(java.io.IOException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork)

Aggregations

DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)153 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)61 Table (org.apache.hadoop.hive.ql.metadata.Table)34 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)31 ASTNode (org.apache.hadoop.hive.ql.parse.ASTNode)24 TableName (org.apache.hadoop.hive.common.TableName)23 Test (org.junit.Test)23 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)22 PrincipalDesc (org.apache.hadoop.hive.ql.ddl.privilege.PrincipalDesc)21 ArrayList (java.util.ArrayList)18 Path (org.apache.hadoop.fs.Path)15 HashMap (java.util.HashMap)14 Database (org.apache.hadoop.hive.metastore.api.Database)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)11 Tree (org.antlr.runtime.tree.Tree)10 HashSet (java.util.HashSet)9 Context (org.apache.hadoop.hive.ql.Context)9 PrivilegeDesc (org.apache.hadoop.hive.ql.ddl.privilege.PrivilegeDesc)9 ShowRoleGrantDesc (org.apache.hadoop.hive.ql.ddl.privilege.show.rolegrant.ShowRoleGrantDesc)8