use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class SerializationUtilities method deserializePlan.
private static <T> T deserializePlan(Kryo kryo, InputStream in, Class<T> planClass, boolean cloningPlan) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
T plan;
LOG.info("Deserializing " + planClass.getSimpleName() + " using kryo");
if (cloningPlan) {
plan = deserializeObjectByKryo(kryo, in, planClass);
} else {
plan = deserializeObjectByKryo(kryo, in, planClass);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
return plan;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method acquireLocksAndOpenTxn.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. The lock acquisition scheme is
* pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
* sure that the locks are lexicographically sorted.
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened and locks acquired.
* @param startTxnImplicitly in AC=false, the 1st DML starts a txn
**/
private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
SessionState ss = SessionState.get();
HiveTxnManager txnMgr = ss.getTxnMgr();
if (startTxnImplicitly) {
assert !txnMgr.getAutoCommit();
}
try {
// Don't use the userName member, as it may or may not have been set. Get the value from
// conf, which calls into getUGI to figure out who the process is running as.
String userFromUGI;
try {
userFromUGI = conf.getUser();
} catch (IOException e) {
errorMessage = "FAILED: Error in determining user while acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 10;
}
boolean initiatingTransaction = false;
boolean readOnlyQueryInAutoCommit = false;
if ((txnMgr.getAutoCommit() && haveAcidWrite()) || plan.getOperation() == HiveOperation.START_TRANSACTION || (!txnMgr.getAutoCommit() && startTxnImplicitly)) {
if (txnMgr.isTxnOpen()) {
throw new RuntimeException("Already have an open transaction txnid:" + txnMgr.getCurrentTxnId());
}
// We are writing to tables in an ACID compliant way, so we need to open a transaction
txnMgr.openTxn(ctx, userFromUGI);
initiatingTransaction = true;
} else {
readOnlyQueryInAutoCommit = txnMgr.getAutoCommit() && plan.getOperation() == HiveOperation.QUERY && !haveAcidWrite();
}
// Set the transaction id in all of the acid file sinks
if (haveAcidWrite()) {
for (FileSinkDesc desc : acidSinks) {
desc.setTransactionId(txnMgr.getCurrentTxnId());
//it's possible to have > 1 FileSink writing to the same table/partition
//e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(txnMgr.getWriteIdAndIncrement());
}
}
/*Note, we have to record snapshot after lock acquisition to prevent lost update problem
consider 2 concurrent "update table T set x = x + 1". 1st will get the locks and the
2nd will block until 1st one commits and only then lock in the snapshot, i.e. it will
see the changes made by 1st one. This takes care of autoCommit=true case.
For multi-stmt txns this is not sufficient and will be managed via WriteSet tracking
in the lock manager.*/
txnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (initiatingTransaction || (readOnlyQueryInAutoCommit && acidInQuery)) {
//For multi-stmt txns we should record the snapshot when txn starts but
// don't update it after that until txn completes. Thus the check for {@code initiatingTransaction}
//For autoCommit=true, Read-only statements, txn is implicit, i.e. lock in the snapshot
//for each statement.
recordValidTxns();
}
return 0;
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 10;
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method acquireLocks.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. Locking strategy depends on
* HiveTxnManager and HiveLockManager configured
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened.
* @throws CommandProcessorResponse
*/
private void acquireLocks() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
/*non acid txn managers don't support txns but fwd lock requests to lock managers
acid txn manager requires all locks to be associated with a txn so if we
end up here w/o an open txn it's because we are processing something like "use <database>
which by definition needs no locks*/
return;
}
try {
String userFromUGI = getUserFromUGI();
if (userFromUGI == null) {
throw createProcessorResponse(10);
}
// Set the table write id in all of the acid file sinks
if (haveAcidWrite()) {
List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
// sorting makes tests easier to write since file names and ROW__IDs depend on statementId
// so this makes (file name -> data) mapping stable
acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
for (FileSinkDesc desc : acidSinks) {
TableDesc tableInfo = desc.getTableInfo();
long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
desc.setTableWriteId(writeId);
// it's possible to have > 1 FileSink writing to the same table/partition
// e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
}
}
/*It's imperative that {@code acquireLocks()} is called for all commands so that
HiveTxnManager can transition its state machine correctly*/
queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (queryTxnMgr.recordSnapshot(plan)) {
recordValidTxns(queryTxnMgr);
}
if (plan.hasAcidResourcesInQuery()) {
recordValidWriteIds(queryTxnMgr);
}
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(10);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method execute.
private void execute() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
Metrics metrics = MetricsFactory.getInstance();
String queryId = queryState.getQueryId();
// Get the query string from the conf file as the compileInternal() method might
// hide sensitive information during query redaction.
String queryStr = conf.getQueryString();
lDrvState.stateLock.lock();
try {
// a combined compile/execute in runInternal, throws the error
if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) {
SQLState = "HY008";
errorMessage = "FAILED: unexpected driverstate: " + lDrvState + ", for query " + queryStr;
console.printError(errorMessage);
throw createProcessorResponse(1000);
} else {
lDrvState.driverState = DriverState.EXECUTING;
}
} finally {
lDrvState.stateLock.unlock();
}
maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
HookContext hookContext = null;
// Whether there's any error occurred during query execution. Used for query lifetime hook.
boolean executionError = false;
try {
LOG.info("Executing command(queryId=" + queryId + "): " + queryStr);
// compile and execute can get called from different threads in case of HS2
// so clear timing in this thread's Hive object before proceeding.
Hive.get().clearMetaCallTiming();
plan.setStarted();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startQuery(queryStr, queryId);
SessionState.get().getHiveHistory().logPlanProgress(plan);
}
resStream = null;
SessionState ss = SessionState.get();
hookContext = new PrivateHookContext(plan, queryState, ctx.getPathToCS(), SessionState.get().getUserName(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger, queryInfo, ctx);
hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
hookRunner.runPreHooks(hookContext);
// Trigger query hooks before query execution.
hookRunner.runBeforeExecutionHook(queryStr, hookContext);
setQueryDisplays(plan.getRootTasks());
int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size();
if (jobs > 0) {
logMrWarning(mrJobs);
console.printInfo("Query ID = " + queryId);
console.printInfo("Total jobs = " + jobs);
}
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
}
String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
// A runtime that launches runnable tasks as separate Threads through
// TaskRunners
// As soon as a task isRunnable, it is put in a queue
// At any time, at most maxthreads tasks can be running
// The main thread polls the TaskRunners to check if they have finished.
checkInterrupted("before running tasks.", hookContext, perfLogger);
DriverContext driverCxt = new DriverContext(ctx);
driverCxt.prepare(plan);
ctx.setHDFSCleanup(true);
// for canceling the query (should be bound to session?)
this.driverCxt = driverCxt;
SessionState.get().setMapRedStats(new LinkedHashMap<>());
SessionState.get().setStackTraces(new HashMap<>());
SessionState.get().setLocalMapRedErrors(new HashMap<>());
// Add root Tasks to runnable
for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
// incorrect results.
assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
driverCxt.addToRunnable(tsk);
if (metrics != null) {
tsk.updateTaskMetrics(metrics);
}
}
preExecutionCacheActions();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
// Loop while you either have tasks running, or tasks queued up
while (driverCxt.isRunning()) {
// Launch upto maxthreads tasks
Task<? extends Serializable> task;
while ((task = driverCxt.getRunnable(maxthreads)) != null) {
TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
if (!runner.isRunning()) {
break;
}
}
// poll the Tasks to see which one completed
TaskRunner tskRun = driverCxt.pollFinished();
if (tskRun == null) {
continue;
}
/*
This should be removed eventually. HIVE-17814 gives more detail
explanation of whats happening and HIVE-17815 as to why this is done.
Briefly for replication the graph is huge and so memory pressure is going to be huge if
we keep a lot of references around.
*/
String opName = plan.getOperationName();
boolean isReplicationOperation = opName.equals(HiveOperation.REPLDUMP.getOperationName()) || opName.equals(HiveOperation.REPLLOAD.getOperationName());
if (!isReplicationOperation) {
hookContext.addCompleteTask(tskRun);
}
queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult());
Task<? extends Serializable> tsk = tskRun.getTask();
TaskResult result = tskRun.getTaskResult();
int exitVal = result.getExitVal();
checkInterrupted("when checking the execution result.", hookContext, perfLogger);
if (exitVal != 0) {
Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
if (backupTask != null) {
setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
console.printError(errorMessage);
errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
console.printError(errorMessage);
// add backup task to runnable
if (DriverContext.isLaunchable(backupTask)) {
driverCxt.addToRunnable(backupTask);
}
continue;
} else {
setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
if (driverCxt.isShutdown()) {
errorMessage = "FAILED: Operation cancelled. " + errorMessage;
}
invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError());
SQLState = "08S01";
// based on the ErrorMsg set in HiveException.
if (result.getTaskError() instanceof HiveException) {
ErrorMsg errorMsg = ((HiveException) result.getTaskError()).getCanonicalErrorMsg();
if (errorMsg != ErrorMsg.GENERIC_ERROR) {
SQLState = errorMsg.getSQLState();
}
}
console.printError(errorMessage);
driverCxt.shutdown();
// in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
throw createProcessorResponse(exitVal);
}
}
driverCxt.finished(tskRun);
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
SessionState.get().getHiveHistory().endTask(queryId, tsk);
}
if (tsk.getChildTasks() != null) {
for (Task<? extends Serializable> child : tsk.getChildTasks()) {
if (DriverContext.isLaunchable(child)) {
driverCxt.addToRunnable(child);
}
}
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);
postExecutionCacheActions();
// in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
if (driverCxt.isShutdown()) {
SQLState = "HY008";
errorMessage = "FAILED: Operation cancelled";
invokeFailureHooks(perfLogger, hookContext, errorMessage, null);
console.printError(errorMessage);
throw createProcessorResponse(1000);
}
// remove incomplete outputs.
// Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
// remove them
HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
for (WriteEntity output : plan.getOutputs()) {
if (!output.isComplete()) {
remOutputs.add(output);
}
}
for (WriteEntity output : remOutputs) {
plan.getOutputs().remove(output);
}
hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
hookRunner.runPostExecHooks(hookContext);
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
SessionState.get().getHiveHistory().printRowCount(queryId);
}
releasePlan(plan);
} catch (CommandProcessorResponse cpr) {
executionError = true;
throw cpr;
} catch (Throwable e) {
executionError = true;
checkInterrupted("during query execution: \n" + e.getMessage(), hookContext, perfLogger);
ctx.restoreOriginalTracker();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
}
// TODO: do better with handling types of Exception here
errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
if (hookContext != null) {
try {
invokeFailureHooks(perfLogger, hookContext, errorMessage, e);
} catch (Exception t) {
LOG.warn("Failed to invoke failure hook", t);
}
}
SQLState = "08S01";
downstreamError = e;
console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(12);
} finally {
// Trigger query hooks after query completes its execution.
try {
hookRunner.runAfterExecutionHook(queryStr, hookContext, executionError);
} catch (Exception e) {
LOG.warn("Failed when invoking query after execution hook", e);
}
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().endQuery(queryId);
}
if (noName) {
conf.set(MRJobConfig.JOB_NAME, "");
}
double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00;
ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution");
queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings);
Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
if (stats != null && !stats.isEmpty()) {
long totalCpu = 0;
console.printInfo("MapReduce Jobs Launched: ");
for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) {
console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue());
totalCpu += entry.getValue().getCpuMSec();
}
console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
}
lDrvState.stateLock.lock();
try {
lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED;
} finally {
lDrvState.stateLock.unlock();
}
if (lDrvState.isAborted()) {
LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
} else {
LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
}
}
if (console != null) {
console.printInfo("OK");
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class TezCompiler method optimizeTaskPlan.
@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping null scan query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping metadata only query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
physicalCtx = new CrossProductHandler().resolve(physicalCtx);
} else {
LOG.debug("Skipping cross product analysis");
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
} else {
LOG.debug("Skipping llap pre-vectorization pass");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
physicalCtx = new Vectorizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping vectorization");
}
if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
} else {
LOG.debug("Skipping stage id rearranger");
}
if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
physicalCtx = new MemoryDecider().resolve(physicalCtx);
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
} else {
LOG.debug("Skipping llap decider");
}
// This optimizer will serialize all filters that made it to the
// table scan operator to avoid having to do it multiple times on
// the backend. If you have a physical optimization that changes
// table scans or filters, you have to invoke it before this one.
physicalCtx = new SerializeFilter().resolve(physicalCtx);
if (physicalCtx.getContext().getExplainAnalyze() != null) {
new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
return;
}
Aggregations