use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class PartitionPruner method pruneBySequentialScan.
/**
* Pruning partition by getting the partition names first and pruning using Hive expression
* evaluator on client.
* @param tab the table containing the partitions.
* @param partitions the resulting partitions.
* @param prunerExpr the SQL predicate that involves partition columns.
* @param conf Hive Configuration object, can not be NULL.
* @return true iff the partition pruning expression contains non-partition columns.
*/
private static boolean pruneBySequentialScan(Table tab, List<Partition> partitions, ExprNodeGenericFuncDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
List<String> partNames = Hive.get().getPartitionNames(tab.getDbName(), tab.getTableName(), (short) -1);
String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
List<String> partCols = extractPartColNames(tab);
List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
boolean hasUnknownPartitions = prunePartitionNames(partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
if (!partNames.isEmpty()) {
partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames));
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
return hasUnknownPartitions;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class SerializationUtilities method serializePlan.
private static void serializePlan(Kryo kryo, Object plan, OutputStream out, boolean cloningPlan) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
LOG.info("Serializing " + plan.getClass().getSimpleName() + " using kryo");
if (cloningPlan) {
serializeObjectByKryo(kryo, plan, out);
} else {
serializeObjectByKryo(kryo, plan, out);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class SerializationUtilities method deserializePlan.
private static <T> T deserializePlan(Kryo kryo, InputStream in, Class<T> planClass, boolean cloningPlan) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
T plan;
LOG.info("Deserializing " + planClass.getSimpleName() + " using kryo");
if (cloningPlan) {
plan = deserializeObjectByKryo(kryo, in, planClass);
} else {
plan = deserializeObjectByKryo(kryo, in, planClass);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
return plan;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method acquireLocksAndOpenTxn.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. The lock acquisition scheme is
* pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
* sure that the locks are lexicographically sorted.
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened and locks acquired.
* @param startTxnImplicitly in AC=false, the 1st DML starts a txn
**/
private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
SessionState ss = SessionState.get();
HiveTxnManager txnMgr = ss.getTxnMgr();
if (startTxnImplicitly) {
assert !txnMgr.getAutoCommit();
}
try {
// Don't use the userName member, as it may or may not have been set. Get the value from
// conf, which calls into getUGI to figure out who the process is running as.
String userFromUGI;
try {
userFromUGI = conf.getUser();
} catch (IOException e) {
errorMessage = "FAILED: Error in determining user while acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 10;
}
boolean initiatingTransaction = false;
boolean readOnlyQueryInAutoCommit = false;
if ((txnMgr.getAutoCommit() && haveAcidWrite()) || plan.getOperation() == HiveOperation.START_TRANSACTION || (!txnMgr.getAutoCommit() && startTxnImplicitly)) {
if (txnMgr.isTxnOpen()) {
throw new RuntimeException("Already have an open transaction txnid:" + txnMgr.getCurrentTxnId());
}
// We are writing to tables in an ACID compliant way, so we need to open a transaction
txnMgr.openTxn(ctx, userFromUGI);
initiatingTransaction = true;
} else {
readOnlyQueryInAutoCommit = txnMgr.getAutoCommit() && plan.getOperation() == HiveOperation.QUERY && !haveAcidWrite();
}
// Set the transaction id in all of the acid file sinks
if (haveAcidWrite()) {
for (FileSinkDesc desc : acidSinks) {
desc.setTransactionId(txnMgr.getCurrentTxnId());
//it's possible to have > 1 FileSink writing to the same table/partition
//e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(txnMgr.getWriteIdAndIncrement());
}
}
/*Note, we have to record snapshot after lock acquisition to prevent lost update problem
consider 2 concurrent "update table T set x = x + 1". 1st will get the locks and the
2nd will block until 1st one commits and only then lock in the snapshot, i.e. it will
see the changes made by 1st one. This takes care of autoCommit=true case.
For multi-stmt txns this is not sufficient and will be managed via WriteSet tracking
in the lock manager.*/
txnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (initiatingTransaction || (readOnlyQueryInAutoCommit && acidInQuery)) {
//For multi-stmt txns we should record the snapshot when txn starts but
// don't update it after that until txn completes. Thus the check for {@code initiatingTransaction}
//For autoCommit=true, Read-only statements, txn is implicit, i.e. lock in the snapshot
//for each statement.
recordValidTxns();
}
return 0;
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 10;
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class Driver method acquireLocks.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. Locking strategy depends on
* HiveTxnManager and HiveLockManager configured
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened.
* @throws CommandProcessorResponse
*/
private void acquireLocks() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
/*non acid txn managers don't support txns but fwd lock requests to lock managers
acid txn manager requires all locks to be associated with a txn so if we
end up here w/o an open txn it's because we are processing something like "use <database>
which by definition needs no locks*/
return;
}
try {
String userFromUGI = getUserFromUGI();
if (userFromUGI == null) {
throw createProcessorResponse(10);
}
// Set the table write id in all of the acid file sinks
if (haveAcidWrite()) {
List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
// sorting makes tests easier to write since file names and ROW__IDs depend on statementId
// so this makes (file name -> data) mapping stable
acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
for (FileSinkDesc desc : acidSinks) {
TableDesc tableInfo = desc.getTableInfo();
long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
desc.setTableWriteId(writeId);
// it's possible to have > 1 FileSink writing to the same table/partition
// e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
}
}
/*It's imperative that {@code acquireLocks()} is called for all commands so that
HiveTxnManager can transition its state machine correctly*/
queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (queryTxnMgr.recordSnapshot(plan)) {
recordValidTxns(queryTxnMgr);
}
if (plan.hasAcidResourcesInQuery()) {
recordValidWriteIds(queryTxnMgr);
}
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(10);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
Aggregations