use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method execute.
private void execute() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
Metrics metrics = MetricsFactory.getInstance();
String queryId = queryState.getQueryId();
// Get the query string from the conf file as the compileInternal() method might
// hide sensitive information during query redaction.
String queryStr = conf.getQueryString();
lDrvState.stateLock.lock();
try {
// a combined compile/execute in runInternal, throws the error
if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) {
SQLState = "HY008";
errorMessage = "FAILED: unexpected driverstate: " + lDrvState + ", for query " + queryStr;
console.printError(errorMessage);
throw createProcessorResponse(1000);
} else {
lDrvState.driverState = DriverState.EXECUTING;
}
} finally {
lDrvState.stateLock.unlock();
}
maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
HookContext hookContext = null;
// Whether there's any error occurred during query execution. Used for query lifetime hook.
boolean executionError = false;
try {
LOG.info("Executing command(queryId=" + queryId + "): " + queryStr);
// compile and execute can get called from different threads in case of HS2
// so clear timing in this thread's Hive object before proceeding.
Hive.get().clearMetaCallTiming();
plan.setStarted();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startQuery(queryStr, queryId);
SessionState.get().getHiveHistory().logPlanProgress(plan);
}
resStream = null;
SessionState ss = SessionState.get();
hookContext = new PrivateHookContext(plan, queryState, ctx.getPathToCS(), SessionState.get().getUserName(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger, queryInfo, ctx);
hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
hookRunner.runPreHooks(hookContext);
// Trigger query hooks before query execution.
hookRunner.runBeforeExecutionHook(queryStr, hookContext);
setQueryDisplays(plan.getRootTasks());
int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size();
if (jobs > 0) {
logMrWarning(mrJobs);
console.printInfo("Query ID = " + queryId);
console.printInfo("Total jobs = " + jobs);
}
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
}
String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
// A runtime that launches runnable tasks as separate Threads through
// TaskRunners
// As soon as a task isRunnable, it is put in a queue
// At any time, at most maxthreads tasks can be running
// The main thread polls the TaskRunners to check if they have finished.
checkInterrupted("before running tasks.", hookContext, perfLogger);
DriverContext driverCxt = new DriverContext(ctx);
driverCxt.prepare(plan);
ctx.setHDFSCleanup(true);
// for canceling the query (should be bound to session?)
this.driverCxt = driverCxt;
SessionState.get().setMapRedStats(new LinkedHashMap<>());
SessionState.get().setStackTraces(new HashMap<>());
SessionState.get().setLocalMapRedErrors(new HashMap<>());
// Add root Tasks to runnable
for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
// incorrect results.
assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
driverCxt.addToRunnable(tsk);
if (metrics != null) {
tsk.updateTaskMetrics(metrics);
}
}
preExecutionCacheActions();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
// Loop while you either have tasks running, or tasks queued up
while (driverCxt.isRunning()) {
// Launch upto maxthreads tasks
Task<? extends Serializable> task;
while ((task = driverCxt.getRunnable(maxthreads)) != null) {
TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
if (!runner.isRunning()) {
break;
}
}
// poll the Tasks to see which one completed
TaskRunner tskRun = driverCxt.pollFinished();
if (tskRun == null) {
continue;
}
/*
This should be removed eventually. HIVE-17814 gives more detail
explanation of whats happening and HIVE-17815 as to why this is done.
Briefly for replication the graph is huge and so memory pressure is going to be huge if
we keep a lot of references around.
*/
String opName = plan.getOperationName();
boolean isReplicationOperation = opName.equals(HiveOperation.REPLDUMP.getOperationName()) || opName.equals(HiveOperation.REPLLOAD.getOperationName());
if (!isReplicationOperation) {
hookContext.addCompleteTask(tskRun);
}
queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult());
Task<? extends Serializable> tsk = tskRun.getTask();
TaskResult result = tskRun.getTaskResult();
int exitVal = result.getExitVal();
checkInterrupted("when checking the execution result.", hookContext, perfLogger);
if (exitVal != 0) {
Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
if (backupTask != null) {
setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
console.printError(errorMessage);
errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
console.printError(errorMessage);
// add backup task to runnable
if (DriverContext.isLaunchable(backupTask)) {
driverCxt.addToRunnable(backupTask);
}
continue;
} else {
setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
if (driverCxt.isShutdown()) {
errorMessage = "FAILED: Operation cancelled. " + errorMessage;
}
invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError());
SQLState = "08S01";
// based on the ErrorMsg set in HiveException.
if (result.getTaskError() instanceof HiveException) {
ErrorMsg errorMsg = ((HiveException) result.getTaskError()).getCanonicalErrorMsg();
if (errorMsg != ErrorMsg.GENERIC_ERROR) {
SQLState = errorMsg.getSQLState();
}
}
console.printError(errorMessage);
driverCxt.shutdown();
// in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
throw createProcessorResponse(exitVal);
}
}
driverCxt.finished(tskRun);
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
SessionState.get().getHiveHistory().endTask(queryId, tsk);
}
if (tsk.getChildTasks() != null) {
for (Task<? extends Serializable> child : tsk.getChildTasks()) {
if (DriverContext.isLaunchable(child)) {
driverCxt.addToRunnable(child);
}
}
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);
postExecutionCacheActions();
// in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
if (driverCxt.isShutdown()) {
SQLState = "HY008";
errorMessage = "FAILED: Operation cancelled";
invokeFailureHooks(perfLogger, hookContext, errorMessage, null);
console.printError(errorMessage);
throw createProcessorResponse(1000);
}
// remove incomplete outputs.
// Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
// remove them
HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
for (WriteEntity output : plan.getOutputs()) {
if (!output.isComplete()) {
remOutputs.add(output);
}
}
for (WriteEntity output : remOutputs) {
plan.getOutputs().remove(output);
}
hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
hookRunner.runPostExecHooks(hookContext);
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
SessionState.get().getHiveHistory().printRowCount(queryId);
}
releasePlan(plan);
} catch (CommandProcessorResponse cpr) {
executionError = true;
throw cpr;
} catch (Throwable e) {
executionError = true;
checkInterrupted("during query execution: \n" + e.getMessage(), hookContext, perfLogger);
ctx.restoreOriginalTracker();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
}
// TODO: do better with handling types of Exception here
errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
if (hookContext != null) {
try {
invokeFailureHooks(perfLogger, hookContext, errorMessage, e);
} catch (Exception t) {
LOG.warn("Failed to invoke failure hook", t);
}
}
SQLState = "08S01";
downstreamError = e;
console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(12);
} finally {
// Trigger query hooks after query completes its execution.
try {
hookRunner.runAfterExecutionHook(queryStr, hookContext, executionError);
} catch (Exception e) {
LOG.warn("Failed when invoking query after execution hook", e);
}
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().endQuery(queryId);
}
if (noName) {
conf.set(MRJobConfig.JOB_NAME, "");
}
double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00;
ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution");
queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings);
Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
if (stats != null && !stats.isEmpty()) {
long totalCpu = 0;
console.printInfo("MapReduce Jobs Launched: ");
for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) {
console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue());
totalCpu += entry.getValue().getCpuMSec();
}
console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
}
lDrvState.stateLock.lock();
try {
lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED;
} finally {
lDrvState.stateLock.unlock();
}
if (lDrvState.isAborted()) {
LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
} else {
LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
}
}
if (console != null) {
console.printInfo("OK");
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class TezCompiler method optimizeTaskPlan.
@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping null scan query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping metadata only query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
physicalCtx = new CrossProductHandler().resolve(physicalCtx);
} else {
LOG.debug("Skipping cross product analysis");
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
} else {
LOG.debug("Skipping llap pre-vectorization pass");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
physicalCtx = new Vectorizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping vectorization");
}
if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
} else {
LOG.debug("Skipping stage id rearranger");
}
if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
physicalCtx = new MemoryDecider().resolve(physicalCtx);
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
} else {
LOG.debug("Skipping llap decider");
}
// This optimizer will serialize all filters that made it to the
// table scan operator to avoid having to do it multiple times on
// the backend. If you have a physical optimization that changes
// table scans or filters, you have to invoke it before this one.
physicalCtx = new SerializeFilter().resolve(physicalCtx);
if (physicalCtx.getContext().getExplainAnalyze() != null) {
new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
return;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class MoveTask method moveFile.
private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) throws HiveException {
try {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
String mesg = "Moving data to " + (isDfsDir ? "" : "local ") + "directory " + targetPath.toString();
String mesg_detail = " from " + sourcePath.toString();
console.printInfo(mesg, mesg_detail);
FileSystem fs = sourcePath.getFileSystem(conf);
if (work.isCTAS() && BlobStorageUtils.isBlobStorageFileSystem(conf, fs)) {
if (fs.exists(new Path(sourcePath, BLOB_MANIFEST_FILE))) {
LOG.debug("Attempting to copy using the paths available in {}", new Path(sourcePath, BLOB_MANIFEST_FILE));
ArrayList<String> filesKept;
try (FSDataInputStream inStream = fs.open(new Path(sourcePath, BLOB_MANIFEST_FILE))) {
String paths = IOUtils.toString(inStream, Charset.defaultCharset());
filesKept = new ArrayList(Arrays.asList(paths.split(System.lineSeparator())));
}
// Remove the first entry from the list, it is the source path.
Path srcPath = new Path(filesKept.remove(0));
LOG.info("Copying files {} from {} to {}", filesKept, srcPath, targetPath);
// Do the move using the filesKept now directly to the target dir.
Utilities.moveSpecifiedFilesInParallel(conf, fs, srcPath, targetPath, new HashSet<>(filesKept));
perfLogger.perfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
return;
}
// Fallback case, in any case the _blob_files_kept isn't created, we can do the normal logic. The file won't
// be created in case of empty source table as well
}
if (isDfsDir) {
moveFileInDfs(sourcePath, targetPath, conf);
} else {
// This is a local file
FileSystem dstFs = FileSystem.getLocal(conf);
moveFileFromDfsToLocal(sourcePath, targetPath, fs, dstFs);
}
perfLogger.perfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
} catch (Exception e) {
throw new HiveException("Unable to move source " + sourcePath + " to destination " + targetPath, e);
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class CombineHiveInputFormat method getSplits.
/**
* Create Hive splits based on CombineFileSplit.
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
ArrayList<InputSplit> result = new ArrayList<InputSplit>();
Path[] paths = getInputPaths(job);
List<Path> nonCombinablePaths = new ArrayList<Path>(paths.length / 2);
List<Path> combinablePaths = new ArrayList<Path>(paths.length / 2);
int numThreads = Math.min(MAX_CHECK_NONCOMBINABLE_THREAD_NUM, (int) Math.ceil((double) paths.length / DEFAULT_NUM_PATH_PER_THREAD));
try {
Set<Integer> nonCombinablePathIndices = getNonCombinablePathIndices(job, paths, numThreads);
for (int i = 0; i < paths.length; i++) {
if (nonCombinablePathIndices.contains(i)) {
nonCombinablePaths.add(paths[i]);
} else {
combinablePaths.add(paths[i]);
}
}
} catch (Exception e) {
LOG.error("Error checking non-combinable path", e);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
throw new IOException(e);
}
// Store the previous value for the path specification
String oldPaths = job.get(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR);
LOG.debug("The received input paths are: [{}] against the property {}", oldPaths, org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR);
// Process the normal splits
if (nonCombinablePaths.size() > 0) {
FileInputFormat.setInputPaths(job, nonCombinablePaths.toArray(new Path[nonCombinablePaths.size()]));
InputSplit[] splits = super.getSplits(job, numSplits);
for (InputSplit split : splits) {
result.add(split);
}
}
// Process the combine splits
if (combinablePaths.size() > 0) {
FileInputFormat.setInputPaths(job, combinablePaths.toArray(new Path[combinablePaths.size()]));
Map<Path, PartitionDesc> pathToPartitionInfo = this.pathToPartitionInfo != null ? this.pathToPartitionInfo : Utilities.getMapWork(job).getPathToPartitionInfo();
InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
for (InputSplit split : splits) {
result.add(split);
}
}
// if some application depends on the original value being set.
if (oldPaths != null) {
job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, oldPaths);
}
// clear work from ThreadLocal after splits generated in case of thread is reused in pool.
Utilities.clearWorkMapForConf(job);
if (result.isEmpty() && paths.length > 0 && job.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
// If there are no inputs; the Execution engine skips the operator tree.
// To prevent it from happening; an opaque ZeroRows input is added here - when needed.
result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(paths[0]), ZeroRowsInputFormat.class.getName()));
}
LOG.info("Number of all splits " + result.size());
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
return result.toArray(new InputSplit[result.size()]);
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class TezCompiler method optimizeOperatorPlan.
@Override
protected void optimizeOperatorPlan(ParseContext pCtx) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
// Create the context for the walker
OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx);
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
runTopNKeyOptimization(procCtx);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run top n key optimization");
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
// setup dynamic partition pruning where possible
runDynamicPartitionPruning(procCtx);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup dynamic partition pruning");
if (procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_MULTICOLUMN)) {
SemiJoinReductionMerge sjmerge = new SemiJoinReductionMerge();
sjmerge.beginPerfLogging();
sjmerge.transform(procCtx.parseContext);
sjmerge.endPerfLogging("Merge single column semi-join reducers to composite");
}
// need to run this; to get consistent filterop conditions(for operator tree matching)
if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext);
}
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
// setup stats in the operator plan
runStatsAnnotation(procCtx);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup stats in the operator plan");
// run Sorted dynamic partition optimization
if (HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONING) && HiveConf.getVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && !HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
new SortedDynPartitionOptimizer().transform(procCtx.parseContext);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization");
}
if (HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
// Dynamic sort partition adds an extra RS therefore need to de-dup
new ReduceSinkDeDuplication().transform(procCtx.parseContext);
// there is an issue with dedup logic wherein SELECT is created with wrong columns
// NonBlockingOpDeDupProc fixes that
// (kind of hackish, the issue in de-dup should be fixed but it needs more investigation)
new NonBlockingOpDeDupProc().transform(procCtx.parseContext);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Reduce Sink de-duplication");
}
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
// run the optimizations that use stats for optimization
runStatsDependentOptimizations(procCtx);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");
// repopulate bucket versions; join conversion may have created some new reducesinks
new BucketVersionPopulator().transform(pCtx);
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTJOINREDUCEDEDUPLICATION)) {
new ReduceSinkJoinDeDuplication().transform(procCtx.parseContext);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection");
semijoinRemovalBasedTransformations(procCtx);
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
if (procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) {
new SharedWorkOptimizer().transform(procCtx.parseContext);
new ParallelEdgeFixer().transform(procCtx.parseContext);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization");
// involving constant true/false values.
if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext);
}
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
AuxOpTreeSignature.linkAuxSignatures(procCtx.parseContext);
markOperatorsWithUnstableRuntimeStats(procCtx);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats");
if (procCtx.conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
bucketingVersionSanityCheck(procCtx);
}
}
Aggregations