use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class GenMapRedUtils method createMoveTask.
/**
* Create and add any dependent move tasks
*
* @param currTask
* @param chDir
* @param fsOp
* @param parseCtx
* @param mvTasks
* @param hconf
* @param dependencyTask
* @return
*/
public static Path createMoveTask(Task<?> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
Path dest = null;
FileSinkDesc fileSinkDesc = fsOp.getConf();
boolean isMmTable = fileSinkDesc.isMmTable();
boolean isDirectInsert = fileSinkDesc.isDirectInsert();
if (chDir) {
dest = fileSinkDesc.getMergeInputDirName();
/**
* Skip temporary file generation for:
* 1. MM Tables
* 2. INSERT operation on full ACID table
*/
if (!isMmTable && !isDirectInsert) {
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
// Create the required temporary file in the HDFS location if the destination
// path of the FileSinkOperator table is a blobstore path.
Path tmpDir = baseCtx.getTempDirForFinalJobPath(fileSinkDesc.getDestPath());
// Change all the linked file sink descriptors
if (fileSinkDesc.isLinkedFileSink()) {
for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir for LinkedFileSink chDir " + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath());
}
}
} else {
fileSinkDesc.setDirName(tmpDir);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir chDir " + tmpDir + "; dest was " + fileSinkDesc.getDestPath());
}
}
}
}
Task<MoveWork> mvTask = null;
if (!chDir) {
mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsOp.getConf().getFinalDirName(), isMmTable, isDirectInsert, fsOp.getConf().getMoveTaskId(), fsOp.getConf().getAcidOperation());
}
// Set the move task to be dependent on the current task
if (mvTask != null) {
GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
}
return dest;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ColumnStatsAutoGatherContext method genSelOp.
private Operator genSelOp(String command, boolean rewritten, Context origCtx) throws ParseException, SemanticException {
// 1. initialization
Context ctx = new Context(conf);
origCtx.addSubContext(ctx);
ctx.setOpContext(origCtx.getOpContext());
ctx.setExplainConfig(origCtx.getExplainConfig());
// 2. parse tree and create semantic analyzer. if we need to rewrite the analyze
// statement, we do it now
final ASTNode ast;
final SemanticAnalyzer sem;
final QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
if (rewritten) {
// Create the context object that is needed to store the column stats
this.analyzeRewrite = ColumnStatsSemanticAnalyzer.genAnalyzeRewriteContext(conf, tbl);
// The analyze statement has already been rewritten, we just need to create the AST
// and the corresponding semantic analyzer
ast = ParseUtils.parse(command, ctx);
BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, ast);
sem = (SemanticAnalyzer) baseSem;
} else {
// We need to rewrite the analyze command and get the rewritten AST
ASTNode analyzeTree = ParseUtils.parse(command, ctx);
BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, analyzeTree);
ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
ast = colSem.rewriteAST(analyzeTree, this);
// Obtain the context object that is needed to store the column stats
this.analyzeRewrite = colSem.getAnalyzeRewriteContext();
// Analyze the rewritten statement
baseSem = SemanticAnalyzerFactory.get(queryState, ast);
sem = (SemanticAnalyzer) baseSem;
}
QB qb = new QB(null, null, false);
ASTNode child = ast;
ParseContext subPCtx = sem.getParseContext();
subPCtx.setContext(ctx);
sem.initParseCtx(subPCtx);
sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
// This will trigger new calls to metastore to collect metadata
// TODO: cache the information from the metastore
sem.getMetaData(qb);
sem.genPlan(qb);
// 3. populate the load file work so that ColumnStatsTask can work
loadFileWork.addAll(sem.getLoadFileWork());
// 4. because there is only one TS for analyze statement, we can get it.
if (sem.topOps.values().size() != 1) {
throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
}
Operator<?> operator = sem.topOps.values().iterator().next();
// otherwise, get the first SEL after TS
if (rewritten) {
while (!(operator instanceof UDTFOperator)) {
operator = operator.getChildOperators().get(0);
}
operator = operator.getChildOperators().get(0);
} else {
while (!(operator instanceof SelectOperator)) {
operator = operator.getChildOperators().get(0);
}
}
return operator;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MapReduceCompiler method decideExecMode.
@Override
protected void decideExecMode(List<Task<?>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
// bypass for explain queries for now
if (ctx.isExplainSkipExecution()) {
return;
}
// user has told us to run in local mode or doesn't want auto-local mode
if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
return;
}
final Context lCtx = ctx;
PathFilter p = new PathFilter() {
@Override
public boolean accept(Path file) {
return !lCtx.isMRTmpFileURI(file.toUri().getPath());
}
};
List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
// map-reduce jobs will be run locally based on data size
// first find out if any of the jobs needs to run non-locally
boolean hasNonLocalJob = false;
for (ExecDriver mrtask : mrtasks) {
try {
ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
long estimatedInput;
if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
// If the global limit optimization is triggered, we will
// estimate input data actually needed based on limit rows.
// estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
//
long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
estimatedInput = estimatedInput * (estimatedNumMap + 1);
} else {
estimatedInput = inputSummary.getLength();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
}
if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
hasNonLocalJob = true;
break;
} else {
mrtask.setLocalMode(true);
}
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (!hasNonLocalJob) {
// Entire query can be run locally.
// Save the current tracker value and restore it when done.
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Automatically selecting local only mode for query");
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ExplainSemanticAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
final int childCount = ast.getChildCount();
// Skip TOK_QUERY.
int i = 1;
while (i < childCount) {
int explainOptions = ast.getChild(i).getType();
if (explainOptions == HiveParser.KW_FORMATTED) {
config.setFormatted(true);
} else if (explainOptions == HiveParser.KW_EXTENDED) {
config.setExtended(true);
} else if (explainOptions == HiveParser.KW_DEPENDENCY) {
config.setDependency(true);
} else if (explainOptions == HiveParser.KW_CBO) {
config.setCbo(true);
} else if (explainOptions == HiveParser.KW_COST) {
config.setCboCost(true);
} else if (explainOptions == HiveParser.KW_JOINCOST) {
config.setCboJoinCost(true);
} else if (explainOptions == HiveParser.KW_LOGICAL) {
config.setLogical(true);
} else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
config.setAuthorize(true);
} else if (explainOptions == HiveParser.KW_ANALYZE) {
config.setAnalyze(AnalyzeState.RUNNING);
config.setExplainRootPath(ctx.getMRTmpPath());
} else if (explainOptions == HiveParser.KW_VECTORIZATION) {
config.setVectorization(true);
if (i + 1 < childCount) {
int vectorizationOption = ast.getChild(i + 1).getType();
// [ONLY]
if (vectorizationOption == HiveParser.TOK_ONLY) {
config.setVectorizationOnly(true);
i++;
if (i + 1 >= childCount) {
break;
}
vectorizationOption = ast.getChild(i + 1).getType();
}
// [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
if (vectorizationOption == HiveParser.TOK_SUMMARY) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
i++;
} else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
i++;
} else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
i++;
} else if (vectorizationOption == HiveParser.TOK_DETAIL) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
i++;
}
}
} else if (explainOptions == HiveParser.KW_LOCKS) {
config.setLocks(true);
} else if (explainOptions == HiveParser.KW_AST) {
config.setAst(true);
} else if (explainOptions == HiveParser.KW_DEBUG) {
config.setDebug(true);
} else if (explainOptions == HiveParser.KW_DDL) {
config.setDDL(true);
config.setCbo(true);
config.setVectorization(true);
} else {
// UNDONE: UNKNOWN OPTION?
}
i++;
}
ctx.setExplainConfig(config);
ctx.setExplainPlan(true);
ASTNode input = (ASTNode) ast.getChild(0);
// step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
if (config.getAnalyze() == AnalyzeState.RUNNING) {
String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
LOG.info("Explain analyze (running phase) for query " + query);
conf.unset(ValidTxnList.VALID_TXNS_KEY);
conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
Context runCtx = null;
try {
runCtx = new Context(conf);
// runCtx and ctx share the configuration, but not isExplainPlan()
runCtx.setExplainConfig(config);
try (Driver driver = new Driver(conf, runCtx, queryState.getLineageState())) {
driver.run(query);
while (driver.getResults(new ArrayList<String>())) {
}
} catch (CommandProcessorException e) {
if (e.getCause() instanceof ReCompileException) {
throw (ReCompileException) e.getCause();
} else {
throw new SemanticException(e.getMessage(), e);
}
}
config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
} catch (IOException e1) {
throw new SemanticException(e1);
}
ctx.resetOpContext();
ctx.resetStream();
TaskFactory.resetId();
LOG.info("Explain analyze (analyzing phase) for query " + query);
config.setAnalyze(AnalyzeState.ANALYZING);
}
// Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
// Sharing QueryState between generating the plan and executing the query seems bad
// BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
sem.analyze(input, ctx);
sem.validate();
inputs = sem.getInputs();
outputs = sem.getOutputs();
ctx.setResFile(ctx.getLocalTmpPath());
List<Task<?>> tasks = sem.getAllRootTasks();
if (tasks == null) {
tasks = Collections.emptyList();
}
FetchTask fetchTask = sem.getFetchTask();
if (fetchTask != null) {
// Initialize fetch work such that operator tree will be constructed.
fetchTask.getWork().initializeForFetch(ctx.getOpContext());
}
if (sem instanceof SemanticAnalyzer) {
pCtx = sem.getParseContext();
}
config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isCbo() && !config.isLogical() && !config.isVectorization() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, input, sem, config, ctx.getCboInfo(), ctx.getOptimizedSql(), ctx.getCalcitePlan());
work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
fieldList = ExplainTask.getResultSchema();
rootTasks.add(explTask);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestDummyTxnManager method setUp.
@Before
public void setUp() throws Exception {
conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
conf.setVar(HiveConf.ConfVars.HIVE_TXN_MANAGER, DummyTxnManager.class.getName());
conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
SessionState.start(conf);
ctx = new Context(conf);
txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
Assert.assertTrue(txnMgr instanceof DummyTxnManager);
// Use reflection to set LockManager since creating the object using the
// relection in DummyTxnManager won't take Mocked object
Field field = DummyTxnManager.class.getDeclaredField("lockMgr");
field.setAccessible(true);
field.set(txnMgr, mockLockManager);
Field field2 = DummyTxnManager.class.getDeclaredField("lockManagerCtx");
field2.setAccessible(true);
field2.set(txnMgr, mockLockManagerCtx);
}
Aggregations