Search in sources :

Example 31 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class PartitionPruner method pruneBySequentialScan.

/**
 * Pruning partition by getting the partition names first and pruning using Hive expression
 * evaluator on client.
 * @param tab the table containing the partitions.
 * @param partitions the resulting partitions.
 * @param prunerExpr the SQL predicate that involves partition columns.
 * @param conf Hive Configuration object, can not be NULL.
 * @return true iff the partition pruning expression contains non-partition columns.
 */
private static boolean pruneBySequentialScan(Table tab, List<Partition> partitions, ExprNodeGenericFuncDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
    List<String> partNames = Hive.get().getPartitionNames(tab.getDbName(), tab.getTableName(), (short) -1);
    String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    List<String> partCols = extractPartColNames(tab);
    List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
    boolean hasUnknownPartitions = prunePartitionNames(partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    if (!partNames.isEmpty()) {
        partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames));
    }
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return hasUnknownPartitions;
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 32 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class SerializationUtilities method serializePlan.

private static void serializePlan(Kryo kryo, Object plan, OutputStream out, boolean cloningPlan) {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
    LOG.info("Serializing " + plan.getClass().getSimpleName() + " using kryo");
    if (cloningPlan) {
        serializeObjectByKryo(kryo, plan, out);
    } else {
        serializeObjectByKryo(kryo, plan, out);
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger)

Example 33 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class SerializationUtilities method deserializePlan.

private static <T> T deserializePlan(Kryo kryo, InputStream in, Class<T> planClass, boolean cloningPlan) {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
    T plan;
    LOG.info("Deserializing " + planClass.getSimpleName() + " using kryo");
    if (cloningPlan) {
        plan = deserializeObjectByKryo(kryo, in, planClass);
    } else {
        plan = deserializeObjectByKryo(kryo, in, planClass);
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
    return plan;
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger)

Example 34 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method acquireLocksAndOpenTxn.

/**
   * Acquire read and write locks needed by the statement. The list of objects to be locked are
   * obtained from the inputs and outputs populated by the compiler. The lock acquisition scheme is
   * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
   * sure that the locks are lexicographically sorted.
   *
   * This method also records the list of valid transactions.  This must be done after any
   * transactions have been opened and locks acquired.
   * @param startTxnImplicitly in AC=false, the 1st DML starts a txn
   **/
private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    SessionState ss = SessionState.get();
    HiveTxnManager txnMgr = ss.getTxnMgr();
    if (startTxnImplicitly) {
        assert !txnMgr.getAutoCommit();
    }
    try {
        // Don't use the userName member, as it may or may not have been set.  Get the value from
        // conf, which calls into getUGI to figure out who the process is running as.
        String userFromUGI;
        try {
            userFromUGI = conf.getUser();
        } catch (IOException e) {
            errorMessage = "FAILED: Error in determining user while acquiring locks: " + e.getMessage();
            SQLState = ErrorMsg.findSQLState(e.getMessage());
            downstreamError = e;
            console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
            return 10;
        }
        boolean initiatingTransaction = false;
        boolean readOnlyQueryInAutoCommit = false;
        if ((txnMgr.getAutoCommit() && haveAcidWrite()) || plan.getOperation() == HiveOperation.START_TRANSACTION || (!txnMgr.getAutoCommit() && startTxnImplicitly)) {
            if (txnMgr.isTxnOpen()) {
                throw new RuntimeException("Already have an open transaction txnid:" + txnMgr.getCurrentTxnId());
            }
            // We are writing to tables in an ACID compliant way, so we need to open a transaction
            txnMgr.openTxn(ctx, userFromUGI);
            initiatingTransaction = true;
        } else {
            readOnlyQueryInAutoCommit = txnMgr.getAutoCommit() && plan.getOperation() == HiveOperation.QUERY && !haveAcidWrite();
        }
        // Set the transaction id in all of the acid file sinks
        if (haveAcidWrite()) {
            for (FileSinkDesc desc : acidSinks) {
                desc.setTransactionId(txnMgr.getCurrentTxnId());
                //it's possible to have > 1 FileSink writing to the same table/partition
                //e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
                desc.setStatementId(txnMgr.getWriteIdAndIncrement());
            }
        }
        /*Note, we have to record snapshot after lock acquisition to prevent lost update problem
      consider 2 concurrent "update table T set x = x + 1".  1st will get the locks and the
      2nd will block until 1st one commits and only then lock in the snapshot, i.e. it will
      see the changes made by 1st one.  This takes care of autoCommit=true case.
      For multi-stmt txns this is not sufficient and will be managed via WriteSet tracking
      in the lock manager.*/
        txnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
        if (initiatingTransaction || (readOnlyQueryInAutoCommit && acidInQuery)) {
            //For multi-stmt txns we should record the snapshot when txn starts but
            // don't update it after that until txn completes.  Thus the check for {@code initiatingTransaction}
            //For autoCommit=true, Read-only statements, txn is implicit, i.e. lock in the snapshot
            //for each statement.
            recordValidTxns();
        }
        return 0;
    } catch (Exception e) {
        errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        SQLState = ErrorMsg.findSQLState(e.getMessage());
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 10;
    } finally {
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) IOException(java.io.IOException) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException)

Example 35 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method acquireLocks.

/**
 * Acquire read and write locks needed by the statement. The list of objects to be locked are
 * obtained from the inputs and outputs populated by the compiler.  Locking strategy depends on
 * HiveTxnManager and HiveLockManager configured
 *
 * This method also records the list of valid transactions.  This must be done after any
 * transactions have been opened.
 * @throws CommandProcessorResponse
 */
private void acquireLocks() throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
        /*non acid txn managers don't support txns but fwd lock requests to lock managers
        acid txn manager requires all locks to be associated with a txn so if we
        end up here w/o an open txn it's because we are processing something like "use <database>
        which by definition needs no locks*/
        return;
    }
    try {
        String userFromUGI = getUserFromUGI();
        if (userFromUGI == null) {
            throw createProcessorResponse(10);
        }
        // Set the table write id in all of the acid file sinks
        if (haveAcidWrite()) {
            List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
            // sorting makes tests easier to write since file names and ROW__IDs depend on statementId
            // so this makes (file name -> data) mapping stable
            acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
            for (FileSinkDesc desc : acidSinks) {
                TableDesc tableInfo = desc.getTableInfo();
                long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
                desc.setTableWriteId(writeId);
                // it's possible to have > 1 FileSink writing to the same table/partition
                // e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
                desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
            }
        }
        /*It's imperative that {@code acquireLocks()} is called for all commands so that
      HiveTxnManager can transition its state machine correctly*/
        queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
        if (queryTxnMgr.recordSnapshot(plan)) {
            recordValidTxns(queryTxnMgr);
        }
        if (plan.hasAcidResourcesInQuery()) {
            recordValidWriteIds(queryTxnMgr);
        }
    } catch (Exception e) {
        errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        SQLState = ErrorMsg.findSQLState(e.getMessage());
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(10);
    } finally {
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    }
}
Also used : FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException)

Aggregations

PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)60 ArrayList (java.util.ArrayList)22 IOException (java.io.IOException)21 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)16 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)13 ExecutionException (java.util.concurrent.ExecutionException)11 Path (org.apache.hadoop.fs.Path)11 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 TException (org.apache.thrift.TException)11 HiveMetaException (org.apache.hadoop.hive.metastore.HiveMetaException)10 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 FileNotFoundException (java.io.FileNotFoundException)8 UnknownHostException (java.net.UnknownHostException)8 LinkedList (java.util.LinkedList)8 JDODataStoreException (javax.jdo.JDODataStoreException)8 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)8 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8