use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ColumnStatsAutoGatherContext method genSelOpForAnalyze.
@SuppressWarnings("rawtypes")
private Operator genSelOpForAnalyze(String analyzeCommand, Context origCtx) throws IOException, ParseException, SemanticException {
// 0. initialization
Context ctx = new Context(conf);
ctx.setExplainConfig(origCtx.getExplainConfig());
ASTNode tree = ParseUtils.parse(analyzeCommand, ctx);
// 1. get the ColumnStatsSemanticAnalyzer
QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, tree);
ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
// 2. get the rewritten AST
ASTNode ast = colSem.rewriteAST(tree, this);
baseSem = SemanticAnalyzerFactory.get(queryState, ast);
SemanticAnalyzer sem = (SemanticAnalyzer) baseSem;
QB qb = new QB(null, null, false);
ASTNode child = ast;
ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
subPCtx.setContext(ctx);
((SemanticAnalyzer) sem).initParseCtx(subPCtx);
sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
// This will trigger new calls to metastore to collect metadata
// TODO: cache the information from the metastore
sem.getMetaData(qb);
Operator<?> operator = sem.genPlan(qb);
// 3. populate the load file work so that ColumnStatsTask can work
loadFileWork.addAll(sem.getLoadFileWork());
// 4. because there is only one TS for analyze statement, we can get it.
if (sem.topOps.values().size() != 1) {
throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
}
operator = sem.topOps.values().iterator().next();
// 5. get the first SEL after TS
while (!(operator instanceof SelectOperator)) {
operator = operator.getChildOperators().get(0);
}
return operator;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ExplainSemanticAnalyzer method analyzeInternal.
@SuppressWarnings("unchecked")
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
final int childCount = ast.getChildCount();
// Skip TOK_QUERY.
int i = 1;
while (i < childCount) {
int explainOptions = ast.getChild(i).getType();
if (explainOptions == HiveParser.KW_FORMATTED) {
config.setFormatted(true);
} else if (explainOptions == HiveParser.KW_EXTENDED) {
config.setExtended(true);
} else if (explainOptions == HiveParser.KW_DEPENDENCY) {
config.setDependency(true);
} else if (explainOptions == HiveParser.KW_LOGICAL) {
config.setLogical(true);
} else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
config.setAuthorize(true);
} else if (explainOptions == HiveParser.KW_ANALYZE) {
config.setAnalyze(AnalyzeState.RUNNING);
config.setExplainRootPath(ctx.getMRTmpPath());
} else if (explainOptions == HiveParser.KW_VECTORIZATION) {
config.setVectorization(true);
if (i + 1 < childCount) {
int vectorizationOption = ast.getChild(i + 1).getType();
// [ONLY]
if (vectorizationOption == HiveParser.TOK_ONLY) {
config.setVectorizationOnly(true);
i++;
if (i + 1 >= childCount) {
break;
}
vectorizationOption = ast.getChild(i + 1).getType();
}
// [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
if (vectorizationOption == HiveParser.TOK_SUMMARY) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
i++;
} else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
i++;
} else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
i++;
} else if (vectorizationOption == HiveParser.TOK_DETAIL) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
i++;
}
}
} else {
// UNDONE: UNKNOWN OPTION?
}
i++;
}
ctx.setExplainConfig(config);
ctx.setExplainPlan(true);
ASTNode input = (ASTNode) ast.getChild(0);
// step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
if (config.getAnalyze() == AnalyzeState.RUNNING) {
String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
LOG.info("Explain analyze (running phase) for query " + query);
Context runCtx = null;
try {
runCtx = new Context(conf);
// runCtx and ctx share the configuration, but not isExplainPlan()
runCtx.setExplainConfig(config);
Driver driver = new Driver(conf, runCtx, queryState.getLineageState());
CommandProcessorResponse ret = driver.run(query);
if (ret.getResponseCode() == 0) {
// However, we need to skip all the results.
while (driver.getResults(new ArrayList<String>())) {
}
} else {
throw new SemanticException(ret.getErrorMessage(), ret.getException());
}
config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
} catch (IOException e1) {
throw new SemanticException(e1);
}
ctx.resetOpContext();
ctx.resetStream();
TaskFactory.resetId();
LOG.info("Explain analyze (analyzing phase) for query " + query);
config.setAnalyze(AnalyzeState.ANALYZING);
}
// Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
// Sharing QueryState between generating the plan and executing the query seems bad
// BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
sem.analyze(input, ctx);
sem.validate();
ctx.setResFile(ctx.getLocalTmpPath());
List<Task<? extends Serializable>> tasks = sem.getAllRootTasks();
if (tasks == null) {
tasks = Collections.emptyList();
}
FetchTask fetchTask = sem.getFetchTask();
if (fetchTask != null) {
// Initialize fetch work such that operator tree will be constructed.
fetchTask.getWork().initializeForFetch(ctx.getOpContext());
}
ParseContext pCtx = null;
if (sem instanceof SemanticAnalyzer) {
pCtx = ((SemanticAnalyzer) sem).getParseContext();
}
config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isLogical() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, sem, config, ctx.getCboInfo());
work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
fieldList = explTask.getResultSchema();
rootTasks.add(explTask);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MapReduceCompiler method decideExecMode.
@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
// bypass for explain queries for now
if (ctx.isExplainSkipExecution()) {
return;
}
// user has told us to run in local mode or doesn't want auto-local mode
if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
return;
}
final Context lCtx = ctx;
PathFilter p = new PathFilter() {
@Override
public boolean accept(Path file) {
return !lCtx.isMRTmpFileURI(file.toUri().getPath());
}
};
List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
// map-reduce jobs will be run locally based on data size
// first find out if any of the jobs needs to run non-locally
boolean hasNonLocalJob = false;
for (ExecDriver mrtask : mrtasks) {
try {
ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
long estimatedInput;
if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
// If the global limit optimization is triggered, we will
// estimate input data actually needed based on limit rows.
// estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
//
long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
estimatedInput = estimatedInput * (estimatedNumMap + 1);
} else {
estimatedInput = inputSummary.getLength();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
}
if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
hasNonLocalJob = true;
break;
} else {
mrtask.setLocalMode(true);
}
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (!hasNonLocalJob) {
// Entire query can be run locally.
// Save the current tracker value and restore it when done.
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Automatically selecting local only mode for query");
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class GenSparkUtils method createMoveTask.
/**
* Create and add any dependent move tasks.
*
* This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
* 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
*/
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
Path dest = null;
FileSinkDesc fileSinkDesc = fsOp.getConf();
if (chDir) {
dest = fsOp.getConf().getFinalDirName();
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
Path tmpDir = baseCtx.getExternalTmpPath(dest);
// Change all the linked file sink descriptors
if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
fsConf.setDirName(tmpDir);
}
} else {
fileSinkDesc.setDirName(tmpDir);
}
}
Task<MoveWork> mvTask = null;
if (!chDir) {
mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
}
// Set the move task to be dependent on the current task
if (mvTask != null) {
GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
}
return dest;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestDbTxnManager2 method setUp.
@Before
public void setUp() throws Exception {
SessionState.start(conf);
ctx = new Context(conf);
driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build(), null);
TxnDbUtil.cleanDb(conf);
TxnDbUtil.prepDb(conf);
SessionState ss = SessionState.get();
ss.initTxnMgr(conf);
txnMgr = ss.getTxnMgr();
Assert.assertTrue(txnMgr instanceof DbTxnManager);
txnHandler = TxnUtils.getTxnStore(conf);
}
Aggregations