use of org.apache.hadoop.hive.ql.plan.DDLWork in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterResourcePlan.
private void analyzeAlterResourcePlan(ASTNode ast) throws SemanticException {
if (ast.getChildCount() < 1) {
throw new SemanticException("Incorrect syntax");
}
Tree nameOrGlobal = ast.getChild(0);
switch(nameOrGlobal.getType()) {
case HiveParser.TOK_ENABLE:
// This command exists solely to output this message. TODO: can we do it w/o an error?
throw new SemanticException("Activate a resource plan to enable workload management");
case HiveParser.TOK_DISABLE:
WMNullableResourcePlan anyRp = new WMNullableResourcePlan();
anyRp.setStatus(WMResourcePlanStatus.ENABLED);
AlterResourcePlanDesc desc = new AlterResourcePlanDesc(anyRp, null, false, false, true, false);
addServiceOutput();
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
return;
// Continue to handle changes to a specific plan.
default:
}
if (ast.getChildCount() < 2) {
throw new SemanticException("Invalid syntax for ALTER RESOURCE PLAN statement");
}
String rpName = unescapeIdentifier(ast.getChild(0).getText());
WMNullableResourcePlan resourcePlan = new WMNullableResourcePlan();
boolean isEnableActivate = false, isReplace = false;
boolean validate = false;
for (int i = 1; i < ast.getChildCount(); ++i) {
Tree child = ast.getChild(i);
switch(child.getType()) {
case HiveParser.TOK_VALIDATE:
validate = true;
break;
case HiveParser.TOK_ACTIVATE:
if (resourcePlan.getStatus() == WMResourcePlanStatus.ENABLED) {
isEnableActivate = true;
}
if (child.getChildCount() > 1) {
throw new SemanticException("Expected 0 or 1 arguments " + ast.toStringTree());
} else if (child.getChildCount() == 1) {
if (child.getChild(0).getType() != HiveParser.TOK_REPLACE) {
throw new SemanticException("Incorrect syntax " + ast.toStringTree());
}
isReplace = true;
// Implied.
isEnableActivate = false;
}
resourcePlan.setStatus(WMResourcePlanStatus.ACTIVE);
break;
case HiveParser.TOK_ENABLE:
if (resourcePlan.getStatus() == WMResourcePlanStatus.ACTIVE) {
isEnableActivate = !isReplace;
} else {
resourcePlan.setStatus(WMResourcePlanStatus.ENABLED);
}
break;
case HiveParser.TOK_DISABLE:
resourcePlan.setStatus(WMResourcePlanStatus.DISABLED);
break;
case HiveParser.TOK_REPLACE:
isReplace = true;
if (child.getChildCount() > 1) {
throw new SemanticException("Expected 0 or 1 arguments " + ast.toStringTree());
} else if (child.getChildCount() == 1) {
// Replace is essentially renaming a plan to the name of an existing plan, with backup.
resourcePlan.setName(unescapeIdentifier(child.getChild(0).getText()));
} else {
resourcePlan.setStatus(WMResourcePlanStatus.ACTIVE);
}
break;
case HiveParser.TOK_QUERY_PARALLELISM:
{
if (child.getChildCount() != 1) {
throw new SemanticException("Expected one argument");
}
Tree val = child.getChild(0);
resourcePlan.setIsSetQueryParallelism(true);
if (val.getType() == HiveParser.TOK_NULL) {
resourcePlan.unsetQueryParallelism();
} else {
resourcePlan.setQueryParallelism(Integer.parseInt(val.getText()));
}
break;
}
case HiveParser.TOK_DEFAULT_POOL:
{
if (child.getChildCount() != 1) {
throw new SemanticException("Expected one argument");
}
Tree val = child.getChild(0);
resourcePlan.setIsSetDefaultPoolPath(true);
if (val.getType() == HiveParser.TOK_NULL) {
resourcePlan.unsetDefaultPoolPath();
} else {
resourcePlan.setDefaultPoolPath(poolPath(child.getChild(0)));
}
break;
}
case HiveParser.TOK_RENAME:
if (child.getChildCount() != 1) {
throw new SemanticException("Expected one argument");
}
resourcePlan.setName(unescapeIdentifier(child.getChild(0).getText()));
break;
default:
throw new SemanticException("Unexpected token in alter resource plan statement: " + child.getType());
}
}
AlterResourcePlanDesc desc = new AlterResourcePlanDesc(resourcePlan, rpName, validate, isEnableActivate, false, isReplace);
if (validate) {
ctx.setResFile(ctx.getLocalTmpPath());
desc.setResFile(ctx.getResFile().toString());
}
addServiceOutput();
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
if (validate) {
setFetchTask(createFetchTask(AlterResourcePlanDesc.getSchema()));
}
}
use of org.apache.hadoop.hive.ql.plan.DDLWork in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTableAddConstraint.
private void analyzeAlterTableAddConstraint(ASTNode ast, String tableName) throws SemanticException {
ASTNode parent = (ASTNode) ast.getParent();
String[] qualifiedTabName = getQualifiedTableName((ASTNode) parent.getChild(0));
ASTNode child = (ASTNode) ast.getChild(0);
List<SQLPrimaryKey> primaryKeys = new ArrayList<>();
List<SQLForeignKey> foreignKeys = new ArrayList<>();
List<SQLUniqueConstraint> uniqueConstraints = new ArrayList<>();
switch(child.getToken().getType()) {
case HiveParser.TOK_UNIQUE:
BaseSemanticAnalyzer.processUniqueConstraints(qualifiedTabName[0], qualifiedTabName[1], child, uniqueConstraints);
break;
case HiveParser.TOK_PRIMARY_KEY:
BaseSemanticAnalyzer.processPrimaryKeys(qualifiedTabName[0], qualifiedTabName[1], child, primaryKeys);
break;
case HiveParser.TOK_FOREIGN_KEY:
BaseSemanticAnalyzer.processForeignKeys(qualifiedTabName[0], qualifiedTabName[1], child, foreignKeys);
break;
default:
throw new SemanticException(ErrorMsg.NOT_RECOGNIZED_CONSTRAINT.getMsg(child.getToken().getText()));
}
AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, primaryKeys, foreignKeys, uniqueConstraints, null);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));
}
use of org.apache.hadoop.hive.ql.plan.DDLWork in project hive by apache.
the class DDLSemanticAnalyzer method analyzeCreatePool.
private void analyzeCreatePool(ASTNode ast) throws SemanticException {
// TODO: allow defaults for e.g. scheduling policy.
if (ast.getChildCount() < 3) {
throw new SemanticException("Expected more arguments: " + ast.toStringTree());
}
String rpName = unescapeIdentifier(ast.getChild(0).getText());
String poolPath = poolPath(ast.getChild(1));
WMPool pool = new WMPool(rpName, poolPath);
for (int i = 2; i < ast.getChildCount(); ++i) {
Tree child = ast.getChild(i);
if (child.getChildCount() != 1) {
throw new SemanticException("Expected 1 paramter for: " + child.getText());
}
String param = child.getChild(0).getText();
switch(child.getType()) {
case HiveParser.TOK_ALLOC_FRACTION:
pool.setAllocFraction(Double.parseDouble(param));
break;
case HiveParser.TOK_QUERY_PARALLELISM:
pool.setQueryParallelism(Integer.parseInt(param));
break;
case HiveParser.TOK_SCHEDULING_POLICY:
String schedulingPolicyStr = PlanUtils.stripQuotes(param);
if (!MetaStoreUtils.isValidSchedulingPolicy(schedulingPolicyStr)) {
throw new SemanticException("Invalid scheduling policy " + schedulingPolicyStr);
}
pool.setSchedulingPolicy(schedulingPolicyStr);
break;
case HiveParser.TOK_PATH:
throw new SemanticException("Invalid parameter path in create pool");
}
}
if (!pool.isSetAllocFraction()) {
throw new SemanticException("alloc_fraction should be specified for a pool");
}
if (!pool.isSetQueryParallelism()) {
throw new SemanticException("query_parallelism should be specified for a pool");
}
CreateOrAlterWMPoolDesc desc = new CreateOrAlterWMPoolDesc(pool, poolPath, false);
addServiceOutput();
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
}
use of org.apache.hadoop.hive.ql.plan.DDLWork in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAbortTxns.
/**
* Add a task to execute "ABORT TRANSACTIONS"
* @param ast The parsed command tree
* @throws SemanticException Parsing failed
*/
private void analyzeAbortTxns(ASTNode ast) throws SemanticException {
List<Long> txnids = new ArrayList<Long>();
int numChildren = ast.getChildCount();
for (int i = 0; i < numChildren; i++) {
txnids.add(Long.parseLong(ast.getChild(i).getText()));
}
AbortTxnsDesc desc = new AbortTxnsDesc(txnids);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
}
use of org.apache.hadoop.hive.ql.plan.DDLWork in project hive by apache.
the class TaskCompiler method compile.
@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
Context ctx = pCtx.getContext();
GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
List<Task<MoveWork>> mvTask = new ArrayList<>();
List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
if (pCtx.getFetchTask() != null) {
if (pCtx.getFetchTask().getTblDesc() == null) {
return;
}
pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
// then either the ThriftFormatter or the DefaultFetchFormatter should be used.
if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
if (SessionState.get().isHiveServerQuery()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
} else {
String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
if (formatterName == null || formatterName.isEmpty()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
}
}
}
return;
}
optimizeOperatorPlan(pCtx, inputs, outputs);
/*
* In case of a select, use a fetch task instead of a move task.
* If the select is from analyze table column rewrite, don't create a fetch task. Instead create
* a column stats task later.
*/
if (pCtx.getQueryProperties().isQuery() && !isCStats) {
if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
}
LoadFileDesc loadFileDesc = loadFileWork.get(0);
String cols = loadFileDesc.getColumns();
String colTypes = loadFileDesc.getColumnTypes();
String resFileFormat;
TableDesc resultTab = pCtx.getFetchTableDesc();
if (resultTab == null) {
resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
}
} else {
if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
}
}
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fetch.setHiveServerQuery(isHiveServerQuery);
fetch.setSource(pCtx.getFetchSource());
fetch.setSink(pCtx.getFetchSink());
if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fetch.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fetch.setIsUsingThriftJDBCBinarySerDe(false);
}
pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch));
// For the FetchTask, the limit optimization requires we fetch all the rows
// in memory and count how many rows we get. It's not practical if the
// limit factor is too big
int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
globalLimitCtx.disableOpt();
}
if (outerQueryLimit == 0) {
// Believe it or not, some tools do generate queries with limit 0 and than expect
// query to run quickly. Lets meet their requirement.
LOG.info("Limit 0. No query execution needed.");
return;
}
} else if (!isCStats) {
for (LoadTableDesc ltd : loadTableWork) {
Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
mvTask.add(tsk);
}
boolean oneLoadFileForCtas = true;
for (LoadFileDesc lfd : loadFileWork) {
if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
if (!oneLoadFileForCtas) {
// should not have more than 1 load file for CTAS.
throw new SemanticException("One query is not expected to contain multiple CTAS loads statements");
}
setLoadFileLocation(pCtx, lfd);
oneLoadFileForCtas = false;
}
mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false)));
}
}
generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
// For each task, set the key descriptor for the reducer
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
}
// to be used, please do so
for (Task<? extends Serializable> rootTask : rootTasks) {
setInputFormat(rootTask);
}
optimizeTaskPlan(rootTasks, pCtx, ctx);
/*
* If the query was the result of analyze table column compute statistics rewrite, create
* a column stats task instead of a fetch task to persist stats to the metastore.
* As per HIVE-15903, we will also collect table stats when user computes column stats.
* That means, if isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()
* We need to collect table stats
* if isCStats, we need to include a basic stats task
* else it is ColumnStatsAutoGather, which should have a move task with a stats task already.
*/
if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
// map from tablename to task (ColumnStatsTask which includes a BasicStatsTask)
Map<String, StatsTask> map = new LinkedHashMap<>();
if (isCStats) {
if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null || pCtx.getTopOps().size() != 1) {
throw new SemanticException("Can not find correct root task!");
}
try {
Task<? extends Serializable> root = rootTasks.iterator().next();
StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values().iterator().next(), root, outputs);
root.addDependentTask(tsk);
map.put(extractTableFullName(tsk), tsk);
} catch (HiveException e) {
throw new SemanticException(e);
}
genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0);
} else {
Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
getLeafTasks(rootTasks, leafTasks);
List<Task<? extends Serializable>> nonStatsLeafTasks = new ArrayList<>();
for (Task<? extends Serializable> tsk : leafTasks) {
// map table name to the correct ColumnStatsTask
if (tsk instanceof StatsTask) {
map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk);
} else {
nonStatsLeafTasks.add(tsk);
}
}
// add cStatsTask as a dependent of all the nonStatsLeafTasks
for (Task<? extends Serializable> tsk : nonStatsLeafTasks) {
for (Task<? extends Serializable> cStatsTask : map.values()) {
tsk.addDependentTask(cStatsTask);
}
}
for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
if (!columnStatsAutoGatherContext.isInsertInto()) {
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0);
} else {
int numBitVector;
try {
numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
} catch (Exception e) {
throw new SemanticException(e.getMessage());
}
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, numBitVector);
}
}
}
}
decideExecMode(rootTasks, ctx, globalLimitCtx);
if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
// generate a DDL task and make it a dependent task of the leaf
CreateTableDesc crtTblDesc = pCtx.getCreateTable();
crtTblDesc.validate(conf);
Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc));
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
} else if (pCtx.getQueryProperties().isMaterializedView()) {
// generate a DDL task and make it a dependent task of the leaf
CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc));
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
} else if (pCtx.getMaterializedViewUpdateDesc() != null) {
// If there is a materialized view update desc, we create introduce it at the end
// of the tree.
MaterializedViewDesc materializedViewDesc = pCtx.getMaterializedViewUpdateDesc();
Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
getLeafTasks(rootTasks, leafTasks);
Task<? extends Serializable> materializedViewTask = TaskFactory.get(materializedViewDesc, conf);
for (Task<? extends Serializable> task : leafTasks) {
task.addDependentTask(materializedViewTask);
}
}
if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
}
if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
}
Interner<TableDesc> interner = Interners.newStrongInterner();
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.internTableDesc(rootTask, interner);
GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
}
}
Aggregations