Examples with Context - org.apache.hadoop.hive.ql.Context

Example 46 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method reparseAndSuperAnalyze.

/**
   * This supports update and delete statements
   */
private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
    List<? extends Node> children = tree.getChildren();
    // The first child should be the table we are deleting from
    ASTNode tabName = (ASTNode) children.get(0);
    assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : "Expected tablename as first child of " + operation() + " but found " + tabName.getName();
    // Rewrite the delete or update into an insert.  Crazy, but it works as deletes and update
    // actually are inserts into the delta file in Hive.  A delete
    // DELETE FROM _tablename_ [WHERE ...]
    // will be rewritten as
    // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[,
    // _partcols_] from _tablename_ SORT BY ROW__ID
    // An update
    // UPDATE _tablename_ SET x = _expr_ [WHERE...]
    // will be rewritten as
    // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_,
    // _partcols_from _tablename_ SORT BY ROW__ID
    // where _all_ is all the non-partition columns.  The expressions from the set clause will be
    // re-attached later.
    // The where clause will also be re-attached later.
    // The sort by clause is put in there so that records come out in the right order to enable
    // merge on read.
    StringBuilder rewrittenQueryStr = new StringBuilder();
    Table mTable = getTargetTable(tabName);
    validateTargetTable(mTable);
    rewrittenQueryStr.append("insert into table ");
    rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
    addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);
    rewrittenQueryStr.append(" select ROW__ID");
    Map<Integer, ASTNode> setColExprs = null;
    Map<String, ASTNode> setCols = null;
    // Must be deterministic order set for consistent q-test output across Java versions
    Set<String> setRCols = new LinkedHashSet<String>();
    if (updating()) {
        // The set list from update should be the second child (index 1)
        assert children.size() >= 2 : "Expected update token to have at least two children";
        ASTNode setClause = (ASTNode) children.get(1);
        setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
        setColExprs = new HashMap<>(setClause.getChildCount());
        List<FieldSchema> nonPartCols = mTable.getCols();
        for (int i = 0; i < nonPartCols.size(); i++) {
            rewrittenQueryStr.append(',');
            String name = nonPartCols.get(i).getName();
            ASTNode setCol = setCols.get(name);
            rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
            if (setCol != null) {
                // This is one of the columns we're setting, record it's position so we can come back
                // later and patch it up.
                // Add one to the index because the select has the ROW__ID as the first column.
                setColExprs.put(i + 1, setCol);
            }
        }
    }
    addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
    rewrittenQueryStr.append(" from ");
    rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
    ASTNode where = null;
    int whereIndex = deleting() ? 1 : 2;
    if (children.size() > whereIndex) {
        where = (ASTNode) children.get(whereIndex);
        assert where.getToken().getType() == HiveParser.TOK_WHERE : "Expected where clause, but found " + where.getName();
    }
    // Add a sort by clause so that the row ids come out in the correct order
    rewrittenQueryStr.append(" sort by ROW__ID ");
    ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
    Context rewrittenCtx = rr.rewrittenCtx;
    ASTNode rewrittenTree = rr.rewrittenTree;
    ASTNode rewrittenInsert = (ASTNode) rewrittenTree.getChildren().get(1);
    assert rewrittenInsert.getToken().getType() == HiveParser.TOK_INSERT : "Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();
    if (updating()) {
        rewrittenCtx.setOperation(Context.Operation.UPDATE);
        rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
    } else if (deleting()) {
        rewrittenCtx.setOperation(Context.Operation.DELETE);
        rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
    }
    if (where != null) {
        // The structure of the AST for the rewritten insert statement is:
        // TOK_QUERY -> TOK_FROM
        //          \-> TOK_INSERT -> TOK_INSERT_INTO
        //                        \-> TOK_SELECT
        //                        \-> TOK_SORTBY
        // The following adds the TOK_WHERE and its subtree from the original query as a child of
        // TOK_INSERT, which is where it would have landed if it had been there originally in the
        // string.  We do it this way because it's easy then turning the original AST back into a
        // string and reparsing it.  We have to move the SORT_BY over one,
        // so grab it and then push it to the second slot, and put the where in the first slot
        ASTNode sortBy = (ASTNode) rewrittenInsert.getChildren().get(2);
        assert sortBy.getToken().getType() == HiveParser.TOK_SORTBY : "Expected TOK_SORTBY to be first child of TOK_SELECT, but found " + sortBy.getName();
        rewrittenInsert.addChild(sortBy);
        rewrittenInsert.setChild(2, where);
    }
    // Patch up the projection list for updates, putting back the original set expressions.
    if (updating() && setColExprs != null) {
        // Walk through the projection list and replace the column names with the
        // expressions from the original update.  Under the TOK_SELECT (see above) the structure
        // looks like:
        // TOK_SELECT -> TOK_SELEXPR -> expr
        //           \-> TOK_SELEXPR -> expr ...
        ASTNode rewrittenSelect = (ASTNode) rewrittenInsert.getChildren().get(1);
        assert rewrittenSelect.getToken().getType() == HiveParser.TOK_SELECT : "Expected TOK_SELECT as second child of TOK_INSERT but found " + rewrittenSelect.getName();
        for (Map.Entry<Integer, ASTNode> entry : setColExprs.entrySet()) {
            ASTNode selExpr = (ASTNode) rewrittenSelect.getChildren().get(entry.getKey());
            assert selExpr.getToken().getType() == HiveParser.TOK_SELEXPR : "Expected child of TOK_SELECT to be TOK_SELEXPR but was " + selExpr.getName();
            // Now, change it's child
            selExpr.setChild(0, entry.getValue());
        }
    }
    try {
        useSuper = true;
        super.analyze(rewrittenTree, rewrittenCtx);
    } finally {
        useSuper = false;
    }
    updateOutputs(mTable);
    if (updating()) {
        setUpAccessControlInfoForUpdate(mTable, setCols);
        // Add the setRCols to the input list
        for (String colName : setRCols) {
            if (columnAccessInfo != null) {
                //assuming this means we are not doing Auth
                columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
            }
        }
    }
}

Also used : LinkedHashSet(java.util.LinkedHashSet) Context(org.apache.hadoop.hive.ql.Context) Table(org.apache.hadoop.hive.ql.metadata.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap)

Example 47 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method parseRewrittenQuery.

/**
   * Parse the newly generated SQL statment to get a new AST
   */
private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException {
    // Parse the rewritten query string
    Context rewrittenCtx;
    try {
        // Set dynamic partitioning to nonstrict so that queries do not need any partition
        // references.
        // todo: this may be a perf issue as it prevents the optimizer.. or not
        HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
        rewrittenCtx = new Context(conf);
        rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
        rewrittenCtx.setIsUpdateDeleteMerge(true);
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
    }
    rewrittenCtx.setCmd(rewrittenQueryStr.toString());
    ASTNode rewrittenTree;
    try {
        LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
        rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
    } catch (ParseException e) {
        throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
    }
    return new ReparseResult(rewrittenTree, rewrittenCtx);
}

Also used : Context(org.apache.hadoop.hive.ql.Context) IOException(java.io.IOException)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)47 Path (org.apache.hadoop.fs.Path)19 IOException (java.io.IOException)15 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)13 FileSystem (org.apache.hadoop.fs.FileSystem)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 Serializable (java.io.Serializable)8 Task (org.apache.hadoop.hive.ql.exec.Task)7 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Table (org.apache.hadoop.hive.ql.metadata.Table)5 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5 ArrayList (java.util.ArrayList)4 EnvironmentContext (org.apache.hadoop.hive.metastore.api.EnvironmentContext)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 JobClient (org.apache.hadoop.mapred.JobClient)4 JobConf (org.apache.hadoop.mapred.JobConf)4