use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Utilities method mvFileToFinalPath.
public static void mvFileToFinalPath(Path specPath, Configuration hconf, boolean success, Logger log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, Reporter reporter) throws IOException, HiveException {
//
// Runaway task attempts (which are unable to be killed by MR/YARN) can cause HIVE-17113,
// where they can write duplicate output files to tmpPath after de-duplicating the files,
// but before tmpPath is moved to specPath.
// Fixing this issue will be done differently for blobstore (e.g. S3)
// vs non-blobstore (local filesystem, HDFS) filesystems due to differences in
// implementation - a directory move in a blobstore effectively results in file-by-file
// moves for every file in a directory, while in HDFS/localFS a directory move is just a
// single filesystem operation.
// - For non-blobstore FS, do the following:
// 1) Rename tmpPath to a new directory name to prevent additional files
// from being added by runaway processes.
// 2) Remove duplicates from the temp directory
// 3) Rename/move the temp directory to specPath
//
// - For blobstore FS, do the following:
// 1) Remove duplicates from tmpPath
// 2) Use moveSpecifiedFiles() to perform a file-by-file move of the de-duped files
// to specPath. On blobstore FS, assuming n files in the directory, this results
// in n file moves, compared to 2*n file moves with the previous solution
// (each directory move would result in a file-by-file move of the files in the directory)
//
FileSystem fs = specPath.getFileSystem(hconf);
boolean isBlobStorage = BlobStorageUtils.isBlobStorageFileSystem(hconf, fs);
Path tmpPath = Utilities.toTempPath(specPath);
Path taskTmpPath = Utilities.toTaskTempPath(specPath);
if (success) {
if (!isBlobStorage && fs.exists(tmpPath)) {
// 1) Rename tmpPath to a new directory name to prevent additional files
// from being added by runaway processes.
Path tmpPathOriginal = tmpPath;
tmpPath = new Path(tmpPath.getParent(), tmpPath.getName() + ".moved");
Utilities.rename(fs, tmpPathOriginal, tmpPath);
}
// Remove duplicates from tmpPath
List<FileStatus> statusList = HiveStatsUtils.getFileStatusRecurse(tmpPath, ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs);
FileStatus[] statuses = statusList.toArray(new FileStatus[statusList.size()]);
if (statuses != null && statuses.length > 0) {
PerfLogger perfLogger = SessionState.getPerfLogger();
Set<Path> filesKept = new HashSet<Path>();
perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles");
// remove any tmp file or double-committed output files
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, statuses, dpCtx, conf, hconf, filesKept, false);
perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles");
// create empty buckets if necessary
if (!emptyBuckets.isEmpty()) {
perfLogger.PerfLogBegin("FileSinkOperator", "CreateEmptyBuckets");
createEmptyBuckets(hconf, emptyBuckets, conf.getCompressed(), conf.getTableInfo(), reporter);
filesKept.addAll(emptyBuckets);
perfLogger.PerfLogEnd("FileSinkOperator", "CreateEmptyBuckets");
}
// move to the file destination
Utilities.FILE_OP_LOGGER.trace("Moving tmp dir: {} to: {}", tmpPath, specPath);
perfLogger.PerfLogBegin("FileSinkOperator", "RenameOrMoveFiles");
if (isBlobStorage) {
// HIVE-17113 - avoid copying files that may have been written to the temp dir by runaway tasks,
// by moving just the files we've tracked from removeTempOrDuplicateFiles().
Utilities.moveSpecifiedFiles(fs, tmpPath, specPath, filesKept);
} else {
// For non-blobstore case, can just move the directory - the initial directory rename
// at the start of this method should prevent files written by runaway tasks.
Utilities.renameOrMoveFiles(fs, tmpPath, specPath);
}
perfLogger.PerfLogEnd("FileSinkOperator", "RenameOrMoveFiles");
}
} else {
Utilities.FILE_OP_LOGGER.trace("deleting tmpPath {}", tmpPath);
fs.delete(tmpPath, true);
}
Utilities.FILE_OP_LOGGER.trace("deleting taskTmpPath {}", taskTmpPath);
fs.delete(taskTmpPath, true);
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method compileInternal.
private void compileInternal(String command, boolean deferClose) throws CommandProcessorResponse {
Metrics metrics = MetricsFactory.getInstance();
if (metrics != null) {
metrics.incrementCounter(MetricsConstant.WAITING_COMPILE_OPS, 1);
}
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.WAIT_COMPILE);
final ReentrantLock compileLock = tryAcquireCompileLock(isParallelEnabled, command);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.WAIT_COMPILE);
if (metrics != null) {
metrics.decrementCounter(MetricsConstant.WAITING_COMPILE_OPS, 1);
}
if (compileLock == null) {
throw createProcessorResponse(ErrorMsg.COMPILE_LOCK_TIMED_OUT.getErrorCode());
}
try {
compile(command, true, deferClose);
} catch (CommandProcessorResponse cpr) {
try {
releaseLocksAndCommitOrRollback(false);
} catch (LockException e) {
LOG.warn("Exception in releasing locks. " + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
throw cpr;
} finally {
compileLock.unlock();
}
// Save compile-time PerfLogging for WebUI.
// Execution-time Perf logs are done by either another thread's PerfLogger
// or a reset PerfLogger.
queryDisplay.setPerfLogStarts(QueryDisplay.Phase.COMPILATION, perfLogger.getStartTimes());
queryDisplay.setPerfLogEnds(QueryDisplay.Phase.COMPILATION, perfLogger.getEndTimes());
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method compile.
// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
private void compile(String command, boolean resetTaskIds, boolean deferClose) throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
lDrvState.stateLock.lock();
try {
lDrvState.driverState = DriverState.COMPILING;
} finally {
lDrvState.stateLock.unlock();
}
command = new VariableSubstitution(new HiveVariableSource() {
@Override
public Map<String, String> getHiveVariable() {
return SessionState.get().getHiveVariables();
}
}).substitute(conf, command);
String queryStr = command;
try {
// command should be redacted to avoid to logging sensitive data
queryStr = HookUtils.redactLogString(conf, command);
} catch (Exception e) {
LOG.warn("WARNING! Query command could not be redacted." + e);
}
checkInterrupted("at beginning of compilation.", null, null);
if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
// close the existing ctx etc before compiling a new query, but does not destroy driver
closeInProcess(false);
}
if (resetTaskIds) {
TaskFactory.resetId();
}
LockedDriverState.setLockedDriverState(lDrvState);
String queryId = queryState.getQueryId();
if (ctx != null) {
setTriggerContext(queryId);
}
// save some info for webUI for use after plan is freed
this.queryDisplay.setQueryStr(queryStr);
this.queryDisplay.setQueryId(queryId);
LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
conf.setQueryString(queryStr);
// FIXME: sideeffect will leave the last query set at the session level
SessionState.get().getConf().setQueryString(queryStr);
SessionState.get().setupQueryCurrentTimestamp();
// Whether any error occurred during query compilation. Used for query lifetime hook.
boolean compileError = false;
boolean parseError = false;
try {
// Initialize the transaction manager. This must be done before analyze is called.
if (initTxnMgr != null) {
queryTxnMgr = initTxnMgr;
} else {
queryTxnMgr = SessionState.get().initTxnMgr(conf);
}
if (queryTxnMgr instanceof Configurable) {
((Configurable) queryTxnMgr).setConf(conf);
}
queryState.setTxnManager(queryTxnMgr);
// In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
// if compile is being called multiple times, clear the old shutdownhook
ShutdownHookManager.removeShutdownHook(shutdownRunner);
final HiveTxnManager txnMgr = queryTxnMgr;
shutdownRunner = new Runnable() {
@Override
public void run() {
try {
releaseLocksAndCommitOrRollback(false, txnMgr);
} catch (LockException e) {
LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
}
}
};
ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
checkInterrupted("before parsing and analysing the query", null, null);
if (ctx == null) {
ctx = new Context(conf);
setTriggerContext(queryId);
}
ctx.setRuntimeStatsSource(runtimeStatsSource);
ctx.setCmd(command);
ctx.setHDFSCleanup(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
// Trigger query hook before compilation
hookRunner.runBeforeParseHook(command);
ASTNode tree;
try {
tree = ParseUtils.parse(command, ctx);
} catch (ParseException e) {
parseError = true;
throw e;
} finally {
hookRunner.runAfterParseHook(command, parseError);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
hookRunner.runBeforeCompileHook(command);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
// Flush the metastore cache. This assures that we don't pick up objects from a previous
// query running in this same thread. This has to be done after we get our semantic
// analyzer (this is when the connection to the metastore is made) but before we analyze,
// because at that point we need access to the objects.
Hive.get().getMSC().flushCache();
BaseSemanticAnalyzer sem;
// Do semantic analysis and plan generation
if (hookRunner.hasPreAnalyzeHooks()) {
HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
hookCtx.setConf(conf);
hookCtx.setUserName(userName);
hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
hookCtx.setCommand(command);
hookCtx.setHiveOperation(queryState.getHiveOperation());
tree = hookRunner.runPreAnalyzeHooks(hookCtx, tree);
sem = SemanticAnalyzerFactory.get(queryState, tree);
openTransaction();
sem.analyze(tree, ctx);
hookCtx.update(sem);
hookRunner.runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
} else {
sem = SemanticAnalyzerFactory.get(queryState, tree);
openTransaction();
sem.analyze(tree, ctx);
}
LOG.info("Semantic Analysis Completed");
// Retrieve information about cache usage for the query.
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
cacheUsage = sem.getCacheUsage();
}
// validate the plan
sem.validate();
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
checkInterrupted("after analyzing query.", null, null);
// get the output schema
schema = getSchema(sem, conf);
plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
conf.set("mapreduce.workflow.id", "hive_" + queryId);
conf.set("mapreduce.workflow.name", queryStr);
// initialize FetchTask right here
if (plan.getFetchTask() != null) {
plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
}
// do the authorization check
if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
doAuthorization(queryState.getHiveOperation(), sem, command);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
errorMessage = authExp.getMessage();
SQLState = "42000";
throw createProcessorResponse(403);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
}
}
if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
String explainOutput = getExplainOutput(sem, plan, tree);
if (explainOutput != null) {
LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
if (conf.isWebUiQueryInfoCacheEnabled()) {
queryDisplay.setExplainPlan(explainOutput);
}
}
}
} catch (CommandProcessorResponse cpr) {
throw cpr;
} catch (Exception e) {
checkInterrupted("during query compilation: " + e.getMessage(), null, null);
compileError = true;
ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
errorMessage = "FAILED: " + e.getClass().getSimpleName();
if (error != ErrorMsg.GENERIC_ERROR) {
errorMessage += " [Error " + error.getErrorCode() + "]:";
}
// HIVE-4889
if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
errorMessage += " " + e.getCause().getMessage();
} else {
errorMessage += " " + e.getMessage();
}
if (error == ErrorMsg.TXNMGR_NOT_ACID) {
errorMessage += ". Failed command: " + queryStr;
}
SQLState = error.getSQLState();
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(error.getErrorCode());
} finally {
// before/after execution hook will never be executed.
if (!parseError) {
try {
hookRunner.runAfterCompilationHook(command, compileError);
} catch (Exception e) {
LOG.warn("Failed when invoking query after-compilation hook.", e);
}
}
double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
boolean isInterrupted = lDrvState.isAborted();
if (isInterrupted && !deferClose) {
closeInProcess(true);
}
lDrvState.stateLock.lock();
try {
if (isInterrupted) {
lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
} else {
lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
}
} finally {
lDrvState.stateLock.unlock();
}
if (isInterrupted) {
LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
} else {
LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
}
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method postExecutionCacheActions.
private void postExecutionCacheActions() throws Exception {
if (cacheUsage != null) {
if (cacheUsage.getStatus() == CacheUsage.CacheStatus.QUERY_USING_CACHE) {
// Using a previously cached result.
CacheEntry cacheEntry = cacheUsage.getCacheEntry();
// Reader count already incremented during cache lookup.
// Save to usedCacheEntry to ensure reader is released after query.
this.usedCacheEntry = cacheEntry;
} else if (cacheUsage.getStatus() == CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS && cacheUsage.getCacheEntry() != null && plan.getFetchTask() != null) {
// Save results to the cache for future queries to use.
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
boolean savedToCache = QueryResultsCache.getInstance().setEntryValid(cacheUsage.getCacheEntry(), plan.getFetchTask().getWork());
LOG.info("savedToCache: {}", savedToCache);
if (savedToCache) {
useFetchFromCache(cacheUsage.getCacheEntry());
// setEntryValid() already increments the reader count. Set usedCacheEntry so it gets released.
this.usedCacheEntry = cacheUsage.getCacheEntry();
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
}
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method releaseLocksAndCommitOrRollback.
/**
* @param commit if there is an open transaction and if true, commit,
* if false rollback. If there is no open transaction this parameter is ignored.
* @param txnManager an optional existing transaction manager retrieved earlier from the session
*/
@VisibleForTesting
public void releaseLocksAndCommitOrRollback(boolean commit, HiveTxnManager txnManager) throws LockException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
HiveTxnManager txnMgr;
if (txnManager == null) {
// Default to driver's txn manager if no txn manager specified
txnMgr = queryTxnMgr;
} else {
txnMgr = txnManager;
}
// If we've opened a transaction we need to commit or rollback rather than explicitly
// releasing the locks.
conf.unset(ValidTxnList.VALID_TXNS_KEY);
conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
if (!checkConcurrency()) {
return;
}
if (txnMgr.isTxnOpen()) {
if (commit) {
if (conf.getBoolVar(ConfVars.HIVE_IN_TEST) && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) {
txnMgr.rollbackTxn();
} else {
// both commit & rollback clear ALL locks for this tx
txnMgr.commitTxn();
}
} else {
txnMgr.rollbackTxn();
}
} else {
// since there is no tx, we only have locks for current query (if any)
if (ctx != null && ctx.getHiveLocks() != null) {
hiveLocks.addAll(ctx.getHiveLocks());
}
txnMgr.releaseLocks(hiveLocks);
}
hiveLocks.clear();
if (ctx != null) {
ctx.setHiveLocks(null);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
}
Aggregations