Search in sources :

Example 1 with CompactionType

use of org.apache.hadoop.hive.metastore.api.CompactionType in project hive by apache.

the class Initiator method checkForCompaction.

private CompactionType checkForCompaction(final CompactionInfo ci, final ValidTxnList txns, final StorageDescriptor sd, final Map<String, String> tblproperties, final String runAs) throws IOException, InterruptedException {
    // If it's marked as too many aborted, we already know we need to compact
    if (ci.tooManyAborts) {
        LOG.debug("Found too many aborted transactions for " + ci.getFullPartitionName() + ", " + "initiating major compaction");
        return CompactionType.MAJOR;
    }
    if (runJobAsSelf(runAs)) {
        return determineCompactionType(ci, txns, sd, tblproperties);
    } else {
        LOG.info("Going to initiate as user " + runAs);
        UserGroupInformation ugi = UserGroupInformation.createProxyUser(runAs, UserGroupInformation.getLoginUser());
        CompactionType compactionType = ugi.doAs(new PrivilegedExceptionAction<CompactionType>() {

            @Override
            public CompactionType run() throws Exception {
                return determineCompactionType(ci, txns, sd, tblproperties);
            }
        });
        try {
            FileSystem.closeAllForUGI(ugi);
        } catch (IOException exception) {
            LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " + ci.getFullPartitionName(), exception);
        }
        return compactionType;
    }
}
Also used : CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 2 with CompactionType

use of org.apache.hadoop.hive.metastore.api.CompactionType in project hive by apache.

the class Initiator method checkForCompaction.

private CompactionType checkForCompaction(final CompactionInfo ci, final ValidWriteIdList writeIds, final StorageDescriptor sd, final Map<String, String> tblproperties, final String runAs) throws IOException, InterruptedException {
    // If it's marked as too many aborted, we already know we need to compact
    if (ci.tooManyAborts) {
        LOG.debug("Found too many aborted transactions for " + ci.getFullPartitionName() + ", " + "initiating major compaction");
        return CompactionType.MAJOR;
    }
    // If it is for insert-only transactional table, return null.
    if (AcidUtils.isInsertOnlyTable(tblproperties)) {
        return null;
    }
    if (runJobAsSelf(runAs)) {
        return determineCompactionType(ci, writeIds, sd, tblproperties);
    } else {
        LOG.info("Going to initiate as user " + runAs);
        UserGroupInformation ugi = UserGroupInformation.createProxyUser(runAs, UserGroupInformation.getLoginUser());
        CompactionType compactionType = ugi.doAs(new PrivilegedExceptionAction<CompactionType>() {

            @Override
            public CompactionType run() throws Exception {
                return determineCompactionType(ci, writeIds, sd, tblproperties);
            }
        });
        try {
            FileSystem.closeAllForUGI(ugi);
        } catch (IOException exception) {
            LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " + ci.getFullPartitionName(), exception);
        }
        return compactionType;
    }
}
Also used : CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 3 with CompactionType

use of org.apache.hadoop.hive.metastore.api.CompactionType in project hive by apache.

the class Initiator method run.

@Override
public void run() {
    // so wrap it in a big catch Throwable statement.
    try {
        recoverFailedCompactions(false);
        int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
        // HiveMetaStore.
        do {
            long startedAt = -1;
            TxnStore.MutexAPI.LockHandle handle = null;
            // don't doom the entire thread.
            try {
                handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
                startedAt = System.currentTimeMillis();
                // todo: add method to only get current i.e. skip history - more efficient
                ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
                for (CompactionInfo ci : potentials) {
                    LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                    try {
                        Table t = resolveTable(ci);
                        if (t == null) {
                            // Most likely this means it's a temp table
                            LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
                            continue;
                        }
                        // check if no compaction set for this table
                        if (noAutoCompactSet(t)) {
                            LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
                            continue;
                        }
                        // then it's a dynamic partitioning case and we shouldn't check the table itself.
                        if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
                            LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
                            continue;
                        }
                        // the time currentCompactions is generated and now
                        if (lookForCurrentCompactions(currentCompactions, ci)) {
                            LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
                            continue;
                        }
                        if (txnHandler.checkFailedCompactions(ci)) {
                            LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
                            txnHandler.markFailed(ci);
                            continue;
                        }
                        // Figure out who we should run the file operations as
                        Partition p = resolvePartition(ci);
                        if (p == null && ci.partName != null) {
                            LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
                            continue;
                        }
                        // Compaction doesn't work under a transaction and hence pass null for validTxnList
                        // The response will have one entry per table and hence we get only one ValidWriteIdList
                        String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
                        GetValidWriteIdsRequest rqst = new GetValidWriteIdsRequest(Collections.singletonList(fullTableName), null);
                        ValidWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(rqst).getTblValidWriteIds().get(0));
                        StorageDescriptor sd = resolveStorageDescriptor(t, p);
                        String runAs = findUserToRunAs(sd.getLocation(), t);
                        /*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
              * Long term we should consider having a thread pool here and running checkForCompactionS
              * in parallel*/
                        CompactionType compactionNeeded = checkForCompaction(ci, tblValidWriteIds, sd, t.getParameters(), runAs);
                        if (compactionNeeded != null)
                            requestCompaction(ci, runAs, compactionNeeded);
                    } catch (Throwable t) {
                        LOG.error("Caught exception while trying to determine if we should compact " + ci + ".  Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
                        txnHandler.markFailed(ci);
                    }
                }
                // Check for timed out remote workers.
                recoverFailedCompactions(true);
                // Clean anything from the txns table that has no components left in txn_components.
                txnHandler.cleanEmptyAbortedTxns();
            } catch (Throwable t) {
                LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
            } finally {
                if (handle != null) {
                    handle.releaseLocks();
                }
            }
            long elapsedTime = System.currentTimeMillis() - startedAt;
            if (elapsedTime >= checkInterval || stop.get())
                continue;
            else
                Thread.sleep(checkInterval - elapsedTime);
        } while (!stop.get());
    } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) GetValidWriteIdsRequest(org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo)

Aggregations

CompactionType (org.apache.hadoop.hive.metastore.api.CompactionType)3 IOException (java.io.IOException)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)2 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)1 GetValidWriteIdsRequest (org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)1 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)1 Table (org.apache.hadoop.hive.metastore.api.Table)1 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)1