use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class UpdateDeleteSemanticAnalyzer method reparseAndSuperAnalyze.
/**
* This supports update and delete statements
* Rewrite the delete or update into an insert. Crazy, but it works as deletes and update
* actually are inserts into the delta file in Hive. A delete
* DELETE FROM _tablename_ [WHERE ...]
* will be rewritten as
* INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[,
* _partcols_] from _tablename_ SORT BY ROW__ID
* An update
* UPDATE _tablename_ SET x = _expr_ [WHERE...]
* will be rewritten as
* INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_,
* _partcols_from _tablename_ SORT BY ROW__ID
* where _all_ is all the non-partition columns. The expressions from the set clause will be
* re-attached later.
* The where clause will also be re-attached later.
* The sort by clause is put in there so that records come out in the right order to enable
* merge on read.
*/
private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
List<? extends Node> children = tree.getChildren();
// The first child should be the table we are updating / deleting from
ASTNode tabName = (ASTNode) children.get(0);
assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : "Expected tablename as first child of " + operation + " but found " + tabName.getName();
Table mTable = getTargetTable(tabName);
validateTargetTable(mTable);
StringBuilder rewrittenQueryStr = new StringBuilder();
rewrittenQueryStr.append("insert into table ");
rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);
rewrittenQueryStr.append(" select ROW__ID");
Map<Integer, ASTNode> setColExprs = null;
Map<String, ASTNode> setCols = null;
// Must be deterministic order set for consistent q-test output across Java versions
Set<String> setRCols = new LinkedHashSet<String>();
if (updating()) {
// The set list from update should be the second child (index 1)
assert children.size() >= 2 : "Expected update token to have at least two children";
ASTNode setClause = (ASTNode) children.get(1);
setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
setColExprs = new HashMap<>(setClause.getChildCount());
List<FieldSchema> nonPartCols = mTable.getCols();
for (int i = 0; i < nonPartCols.size(); i++) {
rewrittenQueryStr.append(',');
String name = nonPartCols.get(i).getName();
ASTNode setCol = setCols.get(name);
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
if (setCol != null) {
// This is one of the columns we're setting, record it's position so we can come back
// later and patch it up.
// Add one to the index because the select has the ROW__ID as the first column.
setColExprs.put(i + 1, setCol);
}
}
}
addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
rewrittenQueryStr.append(" from ");
rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
ASTNode where = null;
int whereIndex = deleting() ? 1 : 2;
if (children.size() > whereIndex) {
where = (ASTNode) children.get(whereIndex);
assert where.getToken().getType() == HiveParser.TOK_WHERE : "Expected where clause, but found " + where.getName();
}
// Add a sort by clause so that the row ids come out in the correct order
rewrittenQueryStr.append(" sort by ROW__ID ");
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
Context rewrittenCtx = rr.rewrittenCtx;
ASTNode rewrittenTree = rr.rewrittenTree;
ASTNode rewrittenInsert = (ASTNode) rewrittenTree.getChildren().get(1);
assert rewrittenInsert.getToken().getType() == HiveParser.TOK_INSERT : "Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();
if (updating()) {
rewrittenCtx.setOperation(Context.Operation.UPDATE);
rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
} else if (deleting()) {
rewrittenCtx.setOperation(Context.Operation.DELETE);
rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
}
if (where != null) {
// The structure of the AST for the rewritten insert statement is:
// TOK_QUERY -> TOK_FROM
// \-> TOK_INSERT -> TOK_INSERT_INTO
// \-> TOK_SELECT
// \-> TOK_SORTBY
// The following adds the TOK_WHERE and its subtree from the original query as a child of
// TOK_INSERT, which is where it would have landed if it had been there originally in the
// string. We do it this way because it's easy then turning the original AST back into a
// string and reparsing it. We have to move the SORT_BY over one,
// so grab it and then push it to the second slot, and put the where in the first slot
ASTNode sortBy = (ASTNode) rewrittenInsert.getChildren().get(2);
assert sortBy.getToken().getType() == HiveParser.TOK_SORTBY : "Expected TOK_SORTBY to be first child of TOK_SELECT, but found " + sortBy.getName();
rewrittenInsert.addChild(sortBy);
rewrittenInsert.setChild(2, where);
}
// Patch up the projection list for updates, putting back the original set expressions.
if (updating() && setColExprs != null) {
// Walk through the projection list and replace the column names with the
// expressions from the original update. Under the TOK_SELECT (see above) the structure
// looks like:
// TOK_SELECT -> TOK_SELEXPR -> expr
// \-> TOK_SELEXPR -> expr ...
ASTNode rewrittenSelect = (ASTNode) rewrittenInsert.getChildren().get(1);
assert rewrittenSelect.getToken().getType() == HiveParser.TOK_SELECT : "Expected TOK_SELECT as second child of TOK_INSERT but found " + rewrittenSelect.getName();
for (Map.Entry<Integer, ASTNode> entry : setColExprs.entrySet()) {
ASTNode selExpr = (ASTNode) rewrittenSelect.getChildren().get(entry.getKey());
assert selExpr.getToken().getType() == HiveParser.TOK_SELEXPR : "Expected child of TOK_SELECT to be TOK_SELEXPR but was " + selExpr.getName();
// Now, change it's child
selExpr.setChild(0, entry.getValue());
}
}
try {
useSuper = true;
// Note: this will overwrite this.ctx with rewrittenCtx
rewrittenCtx.setEnableUnparse(false);
super.analyze(rewrittenTree, rewrittenCtx);
} finally {
useSuper = false;
}
updateOutputs(mTable);
if (updating()) {
setUpAccessControlInfoForUpdate(mTable, setCols);
// Add the setRCols to the input list
for (String colName : setRCols) {
if (columnAccessInfo != null) {
// assuming this means we are not doing Auth
columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
}
}
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestShowPartitionAnalyzer method testGetShowPartitionsFilter.
@Test
public void testGetShowPartitionsFilter() throws Exception {
List<FieldSchema> partColumns = new ArrayList<FieldSchema>();
partColumns.add(new FieldSchema("ds", TypeInfoFactory.dateTypeInfo.getTypeName(), null));
partColumns.add(new FieldSchema("hr", TypeInfoFactory.intTypeInfo.getTypeName(), null));
partColumns.add(new FieldSchema("rs", TypeInfoFactory.stringTypeInfo.getTypeName(), null));
RowResolver rwsch = new RowResolver();
rwsch.put("tableBar", "ds", new ColumnInfo("ds", TypeInfoFactory.dateTypeInfo, null, true));
rwsch.put("tableBar", "hr", new ColumnInfo("hr", TypeInfoFactory.intTypeInfo, null, true));
rwsch.put("tableBar", "rs", new ColumnInfo("rs", TypeInfoFactory.stringTypeInfo, null, true));
TypeCheckCtx tcCtx = new TypeCheckCtx(rwsch);
// Numeric columns compare with the default partition
String showPart1 = "show partitions databaseFoo.tableBar " + "where ds > '2010-03-03' and " + "rs <= 421021";
ASTNode command = ParseUtils.parse(showPart1, new Context(conf));
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) genExprNodeByDefault(tcCtx, command);
// rs <= 421021
ExprNodeGenericFuncDesc child = (ExprNodeGenericFuncDesc) funcDesc.getChildren().get(1);
Assert.assertEquals("rs", ((ExprNodeColumnDesc) child.getChildren().get(0)).getColumn());
Assert.assertEquals(421021, ((ExprNodeConstantDesc) child.getChildren().get(1)).getValue());
Table table = new Table(new org.apache.hadoop.hive.metastore.api.Table("databaseFoo", "tableBar", "foo", 1, 1, -1, null, partColumns, null, null, null, TableType.MANAGED_TABLE.name()));
ShowPartitionAnalyzer analyzer = new ShowPartitionAnalyzer(QueryState.getNewQueryState(new HiveConf(), null));
funcDesc = (ExprNodeGenericFuncDesc) analyzer.getShowPartitionsFilter(table, command);
Assert.assertTrue(funcDesc.getChildren().size() == 2);
// ds > '2010-03-03'
child = (ExprNodeGenericFuncDesc) funcDesc.getChildren().get(0);
Assert.assertEquals("ds", ((ExprNodeColumnDesc) child.getChildren().get(0)).getColumn());
Assert.assertEquals(TypeInfoFactory.dateTypeInfo, child.getChildren().get(0).getTypeInfo());
Assert.assertEquals(child.getChildren().get(0).getTypeString(), child.getChildren().get(1).getTypeString());
// rs <= 421021
child = (ExprNodeGenericFuncDesc) funcDesc.getChildren().get(1);
Assert.assertEquals("rs", ((ExprNodeColumnDesc) child.getChildren().get(0)).getColumn());
Assert.assertEquals(TypeInfoFactory.stringTypeInfo, child.getChildren().get(0).getTypeInfo());
Assert.assertEquals(child.getChildren().get(0).getTypeString(), child.getChildren().get(1).getTypeString());
// invalid input
String showPart2 = "show partitions databaseFoo.tableBar " + "where hr > 'a123' and hr <= '2346b'";
command = ParseUtils.parse(showPart2, new Context(conf));
try {
analyzer.getShowPartitionsFilter(table, command);
Assert.fail("show throw semantic exception");
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Cannot convert to int from string"));
}
funcDesc = (ExprNodeGenericFuncDesc) genExprNodeByDefault(tcCtx, command);
List<String> partColumnNames = new ArrayList<>();
List<PrimitiveTypeInfo> partColumnTypeInfos = new ArrayList<>();
for (FieldSchema fs : partColumns) {
partColumnNames.add(fs.getName());
partColumnTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
}
List<String> partNames = new LinkedList<String>();
partNames.add("ds=2010-11-10/hr=12/rs=NA");
partNames.add("ds=2010-11-10/hr=13/rs=AS");
partNames.add("ds=2010-11-10/hr=23/rs=AE");
// Metastore use this to filter partition names at default
PartitionPruner.prunePartitionNames(partColumnNames, partColumnTypeInfos, funcDesc, "__HIVE_DEFAULT_PARTITION__", partNames);
// hr > 'a123' and hr <= '2346b' filter nothing
Assert.assertTrue(partNames.contains("ds=2010-11-10/hr=12/rs=NA"));
Assert.assertTrue(partNames.contains("ds=2010-11-10/hr=13/rs=AS"));
Assert.assertTrue(partNames.contains("ds=2010-11-10/hr=23/rs=AE"));
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestContext method setUp.
@Before
public void setUp() {
/* Only called to create session directories used by the Context class */
SessionState.start(conf);
SessionState.detachSession();
context = new Context(conf);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class UnlockDatabaseOperation method execute.
@Override
public int execute() throws HiveException {
Context ctx = context.getContext();
HiveTxnManager txnManager = ctx.getHiveTxnManager();
return txnManager.unlockDatabase(context.getDb(), desc);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TezTask method execute.
@Override
public int execute() {
int rc = 1;
boolean cleanContext = false;
Context ctx = null;
Ref<TezSessionState> sessionRef = Ref.from(null);
final String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
try {
// Get or create Context object. If we create it we have to clean it later as well.
ctx = context;
if (ctx == null) {
ctx = new Context(conf);
cleanContext = true;
// some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
// Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
ctx.setWmContext(wmContext);
}
// Need to remove this static hack. But this is the way currently to get a session.
SessionState ss = SessionState.get();
// Note: given that we return pool sessions to the pool in the finally block below, and that
// we need to set the global to null to do that, this "reuse" may be pointless.
TezSessionState session = sessionRef.value = ss.getTezSession();
if (session != null && !session.isOpen()) {
LOG.warn("The session: " + session + " has not been opened");
}
// We only need a username for UGI to use for groups; getGroups will fetch the groups
// based on Hadoop configuration, as documented at
// https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
String userName = getUserNameForGroups(ss);
List<String> groups = null;
if (userName == null) {
userName = "anonymous";
} else {
try {
groups = UserGroupInformation.createRemoteUser(userName).getGroups();
} catch (Exception ex) {
LOG.warn("Cannot obtain groups for " + userName, ex);
}
}
MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
WmContext wmContext = ctx.getWmContext();
// jobConf will hold all the configuration for hadoop, tez, and hive, which are not set in AM defaults
JobConf jobConf = utils.createConfiguration(conf, false);
// Setup the job specific keystore path if exists and put the password into the environment variables of tez am/tasks.
HiveConfUtil.updateJobCredentialProviders(jobConf);
// Get all user jars from work (e.g. input format stuff).
String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
// DAG scratch dir. We get a session from the pool so it may be different from Tez one.
// TODO: we could perhaps reuse the same directory for HiveResources?
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
try {
ss.setTezSession(session);
LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
// Ensure the session is open and has the necessary local resources.
// This would refresh any conf resources and also local resources.
ensureSessionHasResources(session, allNonConfFiles);
// This is a combination of the jar stuff from conf, and not from conf.
List<LocalResource> allNonAppResources = session.getLocalizedResources();
logResources(allNonAppResources);
Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
// next we translate the TezWork to a Tez DAG
DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
dag.setCallerContext(callerContext);
// Check isShutdown opportunistically; it's never unset.
if (this.isShutdown) {
throw new HiveException("Operation cancelled");
}
DAGClient dagClient = submit(dag, sessionRef);
session = sessionRef.value;
boolean wasShutdown = false;
synchronized (dagClientLock) {
assert this.dagClient == null;
wasShutdown = this.isShutdown;
if (!wasShutdown) {
this.dagClient = dagClient;
}
}
if (wasShutdown) {
closeDagClientOnCancellation(dagClient);
throw new HiveException("Operation cancelled");
}
// Log all the info required to find the various logs for this query
LOG.info("HS2 Host: [{}], Query ID: [{}], Dag ID: [{}], DAG Session ID: [{}]", ServerUtils.hostname(), queryId, this.dagClient.getDagIdentifierString(), this.dagClient.getSessionIdentifierString());
// finally monitor will print progress until the job is done
TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx, counters);
rc = monitor.monitorExecution();
if (rc != 0) {
this.setException(new HiveException(monitor.getDiagnostics()));
}
try {
// fetch the counters
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
TezCounters dagCounters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
// if initial counters exists, merge it with dag counters to get aggregated view
TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters);
counters = mergedCounters;
} catch (Exception err) {
// Don't fail execution due to counters - just don't print summary info
LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete.", err);
counters = null;
}
// save useful commit information into query state, e.g. for custom commit hooks, like Iceberg
if (rc == 0) {
collectCommitInformation(work);
}
} finally {
// Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
// Currently, reopen on an attempted reuse will take care of that; we cannot tell
// if the session is usable until we try.
// We return this to the pool even if it's unusable; reopen is supposed to handle this.
wmContext = ctx.getWmContext();
try {
if (sessionRef.value != null) {
sessionRef.value.returnToSessionManager();
}
} catch (Exception e) {
LOG.error("Failed to return session: {} to pool", session, e);
throw e;
}
if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
wmContext.printJson(console);
} else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
wmContext.print(console);
}
}
}
if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
for (CounterGroup group : counters) {
LOG.info(group.getDisplayName() + ":");
for (TezCounter counter : group) {
LOG.info(" " + counter.getDisplayName() + ": " + counter.getValue());
}
}
}
updateNumRows();
} catch (Exception e) {
LOG.error("Failed to execute tez graph.", e);
setException(e);
// rc will be 1 at this point indicating failure.
} finally {
Utilities.clearWork(conf);
// Clear gWorkMap
for (BaseWork w : work.getAllWork()) {
JobConf workCfg = workToConf.get(w);
if (workCfg != null) {
Utilities.clearWorkMapForConf(workCfg);
}
}
if (cleanContext) {
try {
ctx.clear();
} catch (Exception e) {
/*best effort*/
LOG.warn("Failed to clean up after tez job", e);
}
}
// need to either move tmp files or remove them
DAGClient dagClient = null;
synchronized (dagClientLock) {
dagClient = this.dagClient;
this.dagClient = null;
}
// DagClient as such should have no bearing on jobClose.
if (dagClient != null) {
// rc will only be overwritten if close errors out
rc = close(work, rc, dagClient);
}
}
return rc;
}
Aggregations