use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class DDLTask method unlockTable.
/**
* Unlock the table/partition specified
* @param db
*
* @param unlockTbl
* the table/partition to be unlocked
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int unlockTable(Hive db, UnlockTableDesc unlockTbl) throws HiveException {
Context ctx = driverContext.getCtx();
HiveTxnManager txnManager = ctx.getHiveTxnManager();
return txnManager.unlockTable(db, unlockTbl);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class UpdateDeleteSemanticAnalyzer method parseRewrittenQuery.
/**
* Parse the newly generated SQL statement to get a new AST
*/
private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException {
// Set dynamic partitioning to nonstrict so that queries do not need any partition
// references.
// todo: this may be a perf issue as it prevents the optimizer.. or not
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
// Parse the rewritten query string
Context rewrittenCtx;
try {
rewrittenCtx = new Context(conf);
// We keep track of all the contexts that are created by this query
// so we can clear them when we finish execution
ctx.addRewrittenStatementContext(rewrittenCtx);
} catch (IOException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
}
rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
rewrittenCtx.setIsUpdateDeleteMerge(true);
rewrittenCtx.setCmd(rewrittenQueryStr.toString());
ASTNode rewrittenTree;
try {
LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
} catch (ParseException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
}
return new ReparseResult(rewrittenTree, rewrittenCtx);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ColumnStatsAutoGatherContext method genSelOpForAnalyze.
@SuppressWarnings("rawtypes")
private Operator genSelOpForAnalyze(String analyzeCommand, Context origCtx) throws IOException, ParseException, SemanticException {
// 0. initialization
Context ctx = new Context(conf);
ctx.setExplainConfig(origCtx.getExplainConfig());
ASTNode tree = ParseUtils.parse(analyzeCommand, ctx);
// 1. get the ColumnStatsSemanticAnalyzer
QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, tree);
ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
// 2. get the rewritten AST
ASTNode ast = colSem.rewriteAST(tree, this);
baseSem = SemanticAnalyzerFactory.get(queryState, ast);
SemanticAnalyzer sem = (SemanticAnalyzer) baseSem;
QB qb = new QB(null, null, false);
ASTNode child = ast;
ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
subPCtx.setContext(ctx);
((SemanticAnalyzer) sem).initParseCtx(subPCtx);
sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
// This will trigger new calls to metastore to collect metadata
// TODO: cache the information from the metastore
sem.getMetaData(qb);
Operator<?> operator = sem.genPlan(qb);
// 3. populate the load file work so that ColumnStatsTask can work
loadFileWork.addAll(sem.getLoadFileWork());
// 4. because there is only one TS for analyze statement, we can get it.
if (sem.topOps.values().size() != 1) {
throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
}
operator = sem.topOps.values().iterator().next();
// 5. get the first SEL after TS
while (!(operator instanceof SelectOperator)) {
operator = operator.getChildOperators().get(0);
}
return operator;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MapReduceCompiler method decideExecMode.
@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
// bypass for explain queries for now
if (ctx.isExplainSkipExecution()) {
return;
}
// user has told us to run in local mode or doesn't want auto-local mode
if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
return;
}
final Context lCtx = ctx;
PathFilter p = new PathFilter() {
@Override
public boolean accept(Path file) {
return !lCtx.isMRTmpFileURI(file.toUri().getPath());
}
};
List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
// map-reduce jobs will be run locally based on data size
// first find out if any of the jobs needs to run non-locally
boolean hasNonLocalJob = false;
for (ExecDriver mrtask : mrtasks) {
try {
ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
long estimatedInput;
if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
// If the global limit optimization is triggered, we will
// estimate input data actually needed based on limit rows.
// estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
//
long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
estimatedInput = estimatedInput * (estimatedNumMap + 1);
} else {
estimatedInput = inputSummary.getLength();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
}
if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
hasNonLocalJob = true;
break;
} else {
mrtask.setLocalMode(true);
}
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (!hasNonLocalJob) {
// Entire query can be run locally.
// Save the current tracker value and restore it when done.
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Automatically selecting local only mode for query");
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class GenSparkUtils method createMoveTask.
/**
* Create and add any dependent move tasks.
*
* This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
* 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
*/
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
Path dest = null;
FileSinkDesc fileSinkDesc = fsOp.getConf();
if (chDir) {
dest = fsOp.getConf().getFinalDirName();
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
Path tmpDir = baseCtx.getExternalTmpPath(dest);
// Change all the linked file sink descriptors
if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
fsConf.setDirName(tmpDir);
}
} else {
fileSinkDesc.setDirName(tmpDir);
}
}
Task<MoveWork> mvTask = null;
if (!chDir) {
mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
}
// Set the move task to be dependent on the current task
if (mvTask != null) {
GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
}
return dest;
}
Aggregations