use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class UpdateDeleteSemanticAnalyzer method reparseAndSuperAnalyze.
/**
* This supports update and delete statements
*/
private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
List<? extends Node> children = tree.getChildren();
// The first child should be the table we are deleting from
ASTNode tabName = (ASTNode) children.get(0);
assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : "Expected tablename as first child of " + operation() + " but found " + tabName.getName();
// Rewrite the delete or update into an insert. Crazy, but it works as deletes and update
// actually are inserts into the delta file in Hive. A delete
// DELETE FROM _tablename_ [WHERE ...]
// will be rewritten as
// INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[,
// _partcols_] from _tablename_ SORT BY ROW__ID
// An update
// UPDATE _tablename_ SET x = _expr_ [WHERE...]
// will be rewritten as
// INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_,
// _partcols_from _tablename_ SORT BY ROW__ID
// where _all_ is all the non-partition columns. The expressions from the set clause will be
// re-attached later.
// The where clause will also be re-attached later.
// The sort by clause is put in there so that records come out in the right order to enable
// merge on read.
StringBuilder rewrittenQueryStr = new StringBuilder();
Table mTable = getTargetTable(tabName);
validateTargetTable(mTable);
rewrittenQueryStr.append("insert into table ");
rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);
rewrittenQueryStr.append(" select ROW__ID");
Map<Integer, ASTNode> setColExprs = null;
Map<String, ASTNode> setCols = null;
// Must be deterministic order set for consistent q-test output across Java versions
Set<String> setRCols = new LinkedHashSet<String>();
if (updating()) {
// The set list from update should be the second child (index 1)
assert children.size() >= 2 : "Expected update token to have at least two children";
ASTNode setClause = (ASTNode) children.get(1);
setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
setColExprs = new HashMap<>(setClause.getChildCount());
List<FieldSchema> nonPartCols = mTable.getCols();
for (int i = 0; i < nonPartCols.size(); i++) {
rewrittenQueryStr.append(',');
String name = nonPartCols.get(i).getName();
ASTNode setCol = setCols.get(name);
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
if (setCol != null) {
// This is one of the columns we're setting, record it's position so we can come back
// later and patch it up.
// Add one to the index because the select has the ROW__ID as the first column.
setColExprs.put(i + 1, setCol);
}
}
}
addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
rewrittenQueryStr.append(" from ");
rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
ASTNode where = null;
int whereIndex = deleting() ? 1 : 2;
if (children.size() > whereIndex) {
where = (ASTNode) children.get(whereIndex);
assert where.getToken().getType() == HiveParser.TOK_WHERE : "Expected where clause, but found " + where.getName();
}
// Add a sort by clause so that the row ids come out in the correct order
rewrittenQueryStr.append(" sort by ROW__ID ");
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
Context rewrittenCtx = rr.rewrittenCtx;
ASTNode rewrittenTree = rr.rewrittenTree;
ASTNode rewrittenInsert = (ASTNode) rewrittenTree.getChildren().get(1);
assert rewrittenInsert.getToken().getType() == HiveParser.TOK_INSERT : "Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();
if (updating()) {
rewrittenCtx.setOperation(Context.Operation.UPDATE);
rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
} else if (deleting()) {
rewrittenCtx.setOperation(Context.Operation.DELETE);
rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
}
if (where != null) {
// The structure of the AST for the rewritten insert statement is:
// TOK_QUERY -> TOK_FROM
// \-> TOK_INSERT -> TOK_INSERT_INTO
// \-> TOK_SELECT
// \-> TOK_SORTBY
// The following adds the TOK_WHERE and its subtree from the original query as a child of
// TOK_INSERT, which is where it would have landed if it had been there originally in the
// string. We do it this way because it's easy then turning the original AST back into a
// string and reparsing it. We have to move the SORT_BY over one,
// so grab it and then push it to the second slot, and put the where in the first slot
ASTNode sortBy = (ASTNode) rewrittenInsert.getChildren().get(2);
assert sortBy.getToken().getType() == HiveParser.TOK_SORTBY : "Expected TOK_SORTBY to be first child of TOK_SELECT, but found " + sortBy.getName();
rewrittenInsert.addChild(sortBy);
rewrittenInsert.setChild(2, where);
}
// Patch up the projection list for updates, putting back the original set expressions.
if (updating() && setColExprs != null) {
// Walk through the projection list and replace the column names with the
// expressions from the original update. Under the TOK_SELECT (see above) the structure
// looks like:
// TOK_SELECT -> TOK_SELEXPR -> expr
// \-> TOK_SELEXPR -> expr ...
ASTNode rewrittenSelect = (ASTNode) rewrittenInsert.getChildren().get(1);
assert rewrittenSelect.getToken().getType() == HiveParser.TOK_SELECT : "Expected TOK_SELECT as second child of TOK_INSERT but found " + rewrittenSelect.getName();
for (Map.Entry<Integer, ASTNode> entry : setColExprs.entrySet()) {
ASTNode selExpr = (ASTNode) rewrittenSelect.getChildren().get(entry.getKey());
assert selExpr.getToken().getType() == HiveParser.TOK_SELEXPR : "Expected child of TOK_SELECT to be TOK_SELEXPR but was " + selExpr.getName();
// Now, change it's child
selExpr.setChild(0, entry.getValue());
}
}
try {
useSuper = true;
super.analyze(rewrittenTree, rewrittenCtx);
} finally {
useSuper = false;
}
updateOutputs(mTable);
if (updating()) {
setUpAccessControlInfoForUpdate(mTable, setCols);
// Add the setRCols to the input list
for (String colName : setRCols) {
if (columnAccessInfo != null) {
//assuming this means we are not doing Auth
columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
}
}
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class UpdateDeleteSemanticAnalyzer method parseRewrittenQuery.
/**
* Parse the newly generated SQL statment to get a new AST
*/
private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException {
// Parse the rewritten query string
Context rewrittenCtx;
try {
// Set dynamic partitioning to nonstrict so that queries do not need any partition
// references.
// todo: this may be a perf issue as it prevents the optimizer.. or not
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
rewrittenCtx = new Context(conf);
rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
rewrittenCtx.setIsUpdateDeleteMerge(true);
} catch (IOException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
}
rewrittenCtx.setCmd(rewrittenQueryStr.toString());
ASTNode rewrittenTree;
try {
LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
} catch (ParseException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
}
return new ReparseResult(rewrittenTree, rewrittenCtx);
}
Aggregations