Examples with CompactionInfo - org.apache.hadoop.hive.metastore.txn.CompactionInfo

Example 11 with CompactionInfo

use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.

the class TestCleaner method blockedByLockTable.

@Test
public void blockedByLockTable() throws Exception {
    Table t = newTable("default", "bblt", false);
    addBaseFile(t, null, 20L, 20);
    addDeltaFile(t, null, 21L, 22L, 2);
    addDeltaFile(t, null, 23L, 24L, 2);
    addDeltaFile(t, null, 21L, 24L, 4);
    burnThroughTransactions(25);
    CompactionRequest rqst = new CompactionRequest("default", "bblt", CompactionType.MINOR);
    txnHandler.compact(rqst);
    CompactionInfo ci = txnHandler.findNextToCompact("fred");
    txnHandler.markCompacted(ci);
    txnHandler.setRunAs(ci.id, System.getProperty("user.name"));
    LockComponent comp = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "default");
    comp.setTablename("bblt");
    comp.setOperationType(DataOperationType.SELECT);
    List<LockComponent> components = new ArrayList<LockComponent>(1);
    components.add(comp);
    LockRequest req = new LockRequest(components, "me", "localhost");
    LockResponse res = txnHandler.lock(req);
    startCleaner();
    // Check there are no compactions requests left.
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(1, compacts.size());
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    Assert.assertEquals("bblt", compacts.get(0).getTablename());
    Assert.assertEquals(CompactionType.MINOR, compacts.get(0).getType());
}

Also used : Table(org.apache.hadoop.hive.metastore.api.Table) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ArrayList(java.util.ArrayList) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 12 with CompactionInfo

use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.

the class TestCompactor method testStatsAfterCompactionPartTbl.

/**
   * After each major compaction, stats need to be updated on each column of the
   * table/partition which previously had stats.
   * 1. create a bucketed ORC backed table (Orc is currently required by ACID)
   * 2. populate 2 partitions with data
   * 3. compute stats
   * 4. insert some data into the table using StreamingAPI
   * 5. Trigger major compaction (which should update stats)
   * 6. check that stats have been updated
   * @throws Exception
   * todo:
   * 2. add non-partitioned test
   * 4. add a test with sorted table?
   */
@Test
public void testStatsAfterCompactionPartTbl() throws Exception {
    //as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
    String tblName = "compaction_test";
    String tblNameStg = tblName + "_stg";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("drop table if exists " + tblNameStg, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("CREATE EXTERNAL TABLE " + tblNameStg + "(a INT, b STRING)" + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' LINES TERMINATED BY '\\n'" + " STORED AS TEXTFILE" + " LOCATION '" + stagingFolder.newFolder().toURI().getPath() + "'", driver);
    executeStatementOnDriver("load data local inpath '" + BASIC_FILE_NAME + "' overwrite into table " + tblNameStg, driver);
    execSelectAndDumpData("select * from " + tblNameStg, driver, "Dumping data for " + tblNameStg + " after load:");
    executeStatementOnDriver("FROM " + tblNameStg + " INSERT INTO TABLE " + tblName + " PARTITION(bkt=0) " + "SELECT a, b where a < 2", driver);
    executeStatementOnDriver("FROM " + tblNameStg + " INSERT INTO TABLE " + tblName + " PARTITION(bkt=1) " + "SELECT a, b where a >= 2", driver);
    execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    CompactionInfo ci = new CompactionInfo("default", tblName, "bkt=0", CompactionType.MAJOR);
    LOG.debug("List of stats columns before analyze Part1: " + txnHandler.findColumnsWithStats(ci));
    Worker.StatsUpdater su = Worker.StatsUpdater.init(ci, colNames, conf, System.getProperty("user.name"));
    //compute stats before compaction
    su.gatherStats();
    LOG.debug("List of stats columns after analyze Part1: " + txnHandler.findColumnsWithStats(ci));
    CompactionInfo ciPart2 = new CompactionInfo("default", tblName, "bkt=1", CompactionType.MAJOR);
    LOG.debug("List of stats columns before analyze Part2: " + txnHandler.findColumnsWithStats(ci));
    su = Worker.StatsUpdater.init(ciPart2, colNames, conf, System.getProperty("user.name"));
    //compute stats before compaction
    su.gatherStats();
    LOG.debug("List of stats columns after analyze Part2: " + txnHandler.findColumnsWithStats(ci));
    //now make sure we get the stats we expect for partition we are going to add data to later
    Map<String, List<ColumnStatisticsObj>> stats = msClient.getPartitionColumnStatistics(ci.dbname, ci.tableName, Arrays.asList(ci.partName), colNames);
    List<ColumnStatisticsObj> colStats = stats.get(ci.partName);
    Assert.assertNotNull("No stats found for partition " + ci.partName, colStats);
    Assert.assertEquals("Expected column 'a' at index 0", "a", colStats.get(0).getColName());
    Assert.assertEquals("Expected column 'b' at index 1", "b", colStats.get(1).getColName());
    LongColumnStatsData colAStats = colStats.get(0).getStatsData().getLongStats();
    Assert.assertEquals("lowValue a", 1, colAStats.getLowValue());
    Assert.assertEquals("highValue a", 1, colAStats.getHighValue());
    Assert.assertEquals("numNulls a", 0, colAStats.getNumNulls());
    Assert.assertEquals("numNdv a", 1, colAStats.getNumDVs());
    StringColumnStatsData colBStats = colStats.get(1).getStatsData().getStringStats();
    Assert.assertEquals("maxColLen b", 3, colBStats.getMaxColLen());
    Assert.assertEquals("avgColLen b", 3.0, colBStats.getAvgColLen(), 0.01);
    Assert.assertEquals("numNulls b", 0, colBStats.getNumNulls());
    Assert.assertEquals("nunDVs", 2, colBStats.getNumDVs());
    //now save stats for partition we won't modify
    stats = msClient.getPartitionColumnStatistics(ciPart2.dbname, ciPart2.tableName, Arrays.asList(ciPart2.partName), colNames);
    colStats = stats.get(ciPart2.partName);
    LongColumnStatsData colAStatsPart2 = colStats.get(0).getStatsData().getLongStats();
    StringColumnStatsData colBStatsPart2 = colStats.get(1).getStatsData().getStringStats();
    HiveEndPoint endPt = new HiveEndPoint(null, ci.dbname, ci.tableName, Arrays.asList("0"));
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    /*next call will eventually end up in HiveEndPoint.createPartitionIfNotExists() which
    makes an operation on Driver
    * and starts it's own CliSessionState and then closes it, which removes it from ThreadLoacal;
    * thus the session
    * created in this class is gone after this; I fixed it in HiveEndPoint*/
    StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
    TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN, txnBatch.getCurrentTransactionState());
    txnBatch.write("50,Kiev".getBytes());
    txnBatch.write("51,St. Petersburg".getBytes());
    txnBatch.write("44,Boston".getBytes());
    txnBatch.commit();
    txnBatch.beginNextTransaction();
    txnBatch.write("52,Tel Aviv".getBytes());
    txnBatch.write("53,Atlantis".getBytes());
    txnBatch.write("53,Boston".getBytes());
    txnBatch.commit();
    txnBatch.close();
    connection.close();
    execSelectAndDumpData("select * from " + ci.getFullTableName(), driver, ci.getFullTableName());
    //so now we have written some new data to bkt=0 and it shows up
    CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, CompactionType.MAJOR);
    rqst.setPartitionname(ci.partName);
    txnHandler.compact(rqst);
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    if (1 != compacts.size()) {
        Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts.toString());
    }
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    stats = msClient.getPartitionColumnStatistics(ci.dbname, ci.tableName, Arrays.asList(ci.partName), colNames);
    colStats = stats.get(ci.partName);
    Assert.assertNotNull("No stats found for partition " + ci.partName, colStats);
    Assert.assertEquals("Expected column 'a' at index 0", "a", colStats.get(0).getColName());
    Assert.assertEquals("Expected column 'b' at index 1", "b", colStats.get(1).getColName());
    colAStats = colStats.get(0).getStatsData().getLongStats();
    Assert.assertEquals("lowValue a", 1, colAStats.getLowValue());
    Assert.assertEquals("highValue a", 53, colAStats.getHighValue());
    Assert.assertEquals("numNulls a", 0, colAStats.getNumNulls());
    Assert.assertEquals("numNdv a", 6, colAStats.getNumDVs());
    colBStats = colStats.get(1).getStatsData().getStringStats();
    Assert.assertEquals("maxColLen b", 14, colBStats.getMaxColLen());
    //cast it to long to get rid of periodic decimal
    Assert.assertEquals("avgColLen b", (long) 6.1111111111, (long) colBStats.getAvgColLen());
    Assert.assertEquals("numNulls b", 0, colBStats.getNumNulls());
    Assert.assertEquals("nunDVs", 10, colBStats.getNumDVs());
    //now check that stats for partition we didn't modify did not change
    stats = msClient.getPartitionColumnStatistics(ciPart2.dbname, ciPart2.tableName, Arrays.asList(ciPart2.partName), colNames);
    colStats = stats.get(ciPart2.partName);
    Assert.assertEquals("Expected stats for " + ciPart2.partName + " to stay the same", colAStatsPart2, colStats.get(0).getStatsData().getLongStats());
    Assert.assertEquals("Expected stats for " + ciPart2.partName + " to stay the same", colBStatsPart2, colStats.get(1).getStatsData().getStringStats());
}

Also used : TransactionBatch(org.apache.hive.hcatalog.streaming.TransactionBatch) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) List(java.util.List) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 13 with CompactionInfo

use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.

the class Cleaner method run.

@Override
public void run() {
    if (cleanerCheckInterval == 0) {
        cleanerCheckInterval = conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RUN_INTERVAL, TimeUnit.MILLISECONDS);
    }
    do {
        // This is solely for testing.  It checks if the test has set the looped value to false,
        // and if so remembers that and then sets it to true at the end.  We have to check here
        // first to make sure we go through a complete iteration of the loop before resetting it.
        boolean setLooped = !looped.get();
        TxnStore.MutexAPI.LockHandle handle = null;
        long startedAt = -1;
        // so wrap it in a big catch Throwable statement.
        try {
            handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Cleaner.name());
            startedAt = System.currentTimeMillis();
            // First look for all the compactions that are waiting to be cleaned.  If we have not
            // seen an entry before, look for all the locks held on that table or partition and
            // record them.  We will then only clean the partition once all of those locks have been
            // released.  This way we avoid removing the files while they are in use,
            // while at the same time avoiding starving the cleaner as new readers come along.
            // This works because we know that any reader who comes along after the worker thread has
            // done the compaction will read the more up to date version of the data (either in a
            // newer delta or in a newer base).
            List<CompactionInfo> toClean = txnHandler.findReadyToClean();
            {
                /**
           * Since there may be more than 1 instance of Cleaner running we may have state info
           * for items which were cleaned by instances.  Here we remove them.
           *
           * In the long run if we add end_time to compaction_queue, then we can check that
           * hive_locks.acquired_at > compaction_queue.end_time + safety_buffer in which case
           * we know the lock owner is reading files created by this compaction or later.
           * The advantage is that we don't have to store the locks.
           */
                Set<Long> currentToCleanSet = new HashSet<>();
                for (CompactionInfo ci : toClean) {
                    currentToCleanSet.add(ci.id);
                }
                Set<Long> cleanPerformedByOthers = new HashSet<>();
                for (long id : compactId2CompactInfoMap.keySet()) {
                    if (!currentToCleanSet.contains(id)) {
                        cleanPerformedByOthers.add(id);
                    }
                }
                for (long id : cleanPerformedByOthers) {
                    compactId2CompactInfoMap.remove(id);
                    compactId2LockMap.remove(id);
                }
            }
            if (toClean.size() > 0 || compactId2LockMap.size() > 0) {
                ShowLocksResponse locksResponse = txnHandler.showLocks(new ShowLocksRequest());
                for (CompactionInfo ci : toClean) {
                    // add it to our queue.
                    if (!compactId2LockMap.containsKey(ci.id)) {
                        compactId2LockMap.put(ci.id, findRelatedLocks(ci, locksResponse));
                        compactId2CompactInfoMap.put(ci.id, ci);
                    }
                }
                // Now, for each entry in the queue, see if all of the associated locks are clear so we
                // can clean
                Set<Long> currentLocks = buildCurrentLockSet(locksResponse);
                List<Long> expiredLocks = new ArrayList<Long>();
                List<Long> compactionsCleaned = new ArrayList<Long>();
                try {
                    for (Map.Entry<Long, Set<Long>> queueEntry : compactId2LockMap.entrySet()) {
                        boolean sawLock = false;
                        for (Long lockId : queueEntry.getValue()) {
                            if (currentLocks.contains(lockId)) {
                                sawLock = true;
                                break;
                            } else {
                                expiredLocks.add(lockId);
                            }
                        }
                        if (!sawLock) {
                            // Remember to remove this when we're out of the loop,
                            // we can't do it in the loop or we'll get a concurrent modification exception.
                            compactionsCleaned.add(queueEntry.getKey());
                            //Future thought: this may be expensive so consider having a thread pool run in parallel
                            clean(compactId2CompactInfoMap.get(queueEntry.getKey()));
                        } else {
                            // Remove the locks we didn't see so we don't look for them again next time
                            for (Long lockId : expiredLocks) {
                                queueEntry.getValue().remove(lockId);
                            }
                        }
                    }
                } finally {
                    if (compactionsCleaned.size() > 0) {
                        for (Long compactId : compactionsCleaned) {
                            compactId2LockMap.remove(compactId);
                            compactId2CompactInfoMap.remove(compactId);
                        }
                    }
                }
            }
        } catch (Throwable t) {
            LOG.error("Caught an exception in the main loop of compactor cleaner, " + StringUtils.stringifyException(t));
        } finally {
            if (handle != null) {
                handle.releaseLocks();
            }
        }
        if (setLooped) {
            looped.set(true);
        }
        // Now, go back to bed until it's time to do this again
        long elapsedTime = System.currentTimeMillis() - startedAt;
        if (elapsedTime >= cleanerCheckInterval || stop.get()) {
            continue;
        } else {
            try {
                Thread.sleep(cleanerCheckInterval - elapsedTime);
            } catch (InterruptedException ie) {
            // What can I do about it?
            }
        }
    } while (!stop.get());
}

Also used : HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) ShowLocksRequest(org.apache.hadoop.hive.metastore.api.ShowLocksRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowLocksResponse(org.apache.hadoop.hive.metastore.api.ShowLocksResponse) HashMap(java.util.HashMap) Map(java.util.Map)

Example 14 with CompactionInfo

use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.

the class Initiator method run.

@Override
public void run() {
    // so wrap it in a big catch Throwable statement.
    try {
        recoverFailedCompactions(false);
        int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
        // HiveMetaStore.
        do {
            long startedAt = -1;
            TxnStore.MutexAPI.LockHandle handle = null;
            // don't doom the entire thread.
            try {
                handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
                startedAt = System.currentTimeMillis();
                //todo: add method to only get current i.e. skip history - more efficient
                ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                ValidTxnList txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
                Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
                for (CompactionInfo ci : potentials) {
                    LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                    try {
                        Table t = resolveTable(ci);
                        if (t == null) {
                            // Most likely this means it's a temp table
                            LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
                            continue;
                        }
                        // check if no compaction set for this table
                        if (noAutoCompactSet(t)) {
                            LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
                            continue;
                        }
                        // then it's a dynamic partitioning case and we shouldn't check the table itself.
                        if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
                            LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
                            continue;
                        }
                        //the time currentCompactions is generated and now
                        if (lookForCurrentCompactions(currentCompactions, ci)) {
                            LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
                            continue;
                        }
                        if (txnHandler.checkFailedCompactions(ci)) {
                            LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
                            txnHandler.markFailed(ci);
                            continue;
                        }
                        // Figure out who we should run the file operations as
                        Partition p = resolvePartition(ci);
                        if (p == null && ci.partName != null) {
                            LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
                            continue;
                        }
                        StorageDescriptor sd = resolveStorageDescriptor(t, p);
                        String runAs = findUserToRunAs(sd.getLocation(), t);
                        /*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
              * Long term we should consider having a thread pool here and running checkForCompactionS
              * in parallel*/
                        CompactionType compactionNeeded = checkForCompaction(ci, txns, sd, t.getParameters(), runAs);
                        if (compactionNeeded != null)
                            requestCompaction(ci, runAs, compactionNeeded);
                    } catch (Throwable t) {
                        LOG.error("Caught exception while trying to determine if we should compact " + ci + ".  Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
                        txnHandler.markFailed(ci);
                    }
                }
                // Check for timed out remote workers.
                recoverFailedCompactions(true);
                // Clean anything from the txns table that has no components left in txn_components.
                txnHandler.cleanEmptyAbortedTxns();
            } catch (Throwable t) {
                LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
            } finally {
                if (handle != null) {
                    handle.releaseLocks();
                }
            }
            long elapsedTime = System.currentTimeMillis() - startedAt;
            if (elapsedTime >= checkInterval || stop.get())
                continue;
            else
                Thread.sleep(checkInterval - elapsedTime);
        } while (!stop.get());
    } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
    }
}

Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo)

Example 15 with CompactionInfo

use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.

the class Worker method run.

//todo: this doesn;t check if compaction is already running (even though Initiator does but we
// don't go  through Initiator for user initiated compactions)
@Override
public void run() {
    do {
        boolean launchedJob = false;
        // so wrap it in a big catch Throwable statement.
        try {
            final CompactionInfo ci = txnHandler.findNextToCompact(name);
            if (ci == null && !stop.get()) {
                try {
                    Thread.sleep(SLEEP_TIME);
                    continue;
                } catch (InterruptedException e) {
                    LOG.warn("Worker thread sleep interrupted " + e.getMessage());
                    continue;
                }
            }
            // Find the table we will be working with.
            Table t1 = null;
            try {
                t1 = resolveTable(ci);
                if (t1 == null) {
                    LOG.info("Unable to find table " + ci.getFullTableName() + ", assuming it was dropped and moving on.");
                    txnHandler.markCleaned(ci);
                    continue;
                }
            } catch (MetaException e) {
                txnHandler.markCleaned(ci);
                continue;
            }
            // This chicanery is to get around the fact that the table needs to be final in order to
            // go into the doAs below.
            final Table t = t1;
            // Find the partition we will be working with, if there is one.
            Partition p = null;
            try {
                p = resolvePartition(ci);
                if (p == null && ci.partName != null) {
                    LOG.info("Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on.");
                    txnHandler.markCleaned(ci);
                    continue;
                }
            } catch (Exception e) {
                txnHandler.markCleaned(ci);
                continue;
            }
            // Find the appropriate storage descriptor
            final StorageDescriptor sd = resolveStorageDescriptor(t, p);
            // Check that the table or partition isn't sorted, as we don't yet support that.
            if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
                LOG.error("Attempt to compact sorted table, which is not yet supported!");
                txnHandler.markCleaned(ci);
                continue;
            }
            final boolean isMajor = ci.isMajorCompaction();
            final ValidTxnList txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
            LOG.debug("ValidCompactTxnList: " + txns.writeToString());
            txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark());
            final StringBuilder jobName = new StringBuilder(name);
            jobName.append("-compactor-");
            jobName.append(ci.getFullPartitionName());
            // Determine who to run as
            String runAs;
            if (ci.runAs == null) {
                runAs = findUserToRunAs(sd.getLocation(), t);
                txnHandler.setRunAs(ci.id, runAs);
            } else {
                runAs = ci.runAs;
            }
            LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName());
            final StatsUpdater su = StatsUpdater.init(ci, txnHandler.findColumnsWithStats(ci), conf, runJobAsSelf(runAs) ? runAs : t.getOwner());
            final CompactorMR mr = new CompactorMR();
            launchedJob = true;
            try {
                if (runJobAsSelf(runAs)) {
                    mr.run(conf, jobName.toString(), t, sd, txns, ci, su, txnHandler);
                } else {
                    UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(), UserGroupInformation.getLoginUser());
                    ugi.doAs(new PrivilegedExceptionAction<Object>() {

                        @Override
                        public Object run() throws Exception {
                            mr.run(conf, jobName.toString(), t, sd, txns, ci, su, txnHandler);
                            return null;
                        }
                    });
                    try {
                        FileSystem.closeAllForUGI(ugi);
                    } catch (IOException exception) {
                        LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " + ci.getFullPartitionName(), exception);
                    }
                }
                txnHandler.markCompacted(ci);
                if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
                    mrJob = mr.getMrJob();
                }
            } catch (Exception e) {
                LOG.error("Caught exception while trying to compact " + ci + ".  Marking failed to avoid repeated failures, " + StringUtils.stringifyException(e));
                txnHandler.markFailed(ci);
            }
        } catch (Throwable t) {
            LOG.error("Caught an exception in the main loop of compactor worker " + name + ", " + StringUtils.stringifyException(t));
        }
        // a bit before we restart the loop.
        if (!launchedJob && !stop.get()) {
            try {
                Thread.sleep(SLEEP_TIME);
            } catch (InterruptedException e) {
            }
        }
    } while (!stop.get());
}

Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)15 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)13 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)13 Table (org.apache.hadoop.hive.metastore.api.Table)13 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)12 Test (org.junit.Test)12 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 ArrayList (java.util.ArrayList)6 Path (org.apache.hadoop.fs.Path)5 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)5 LockComponent (org.apache.hadoop.hive.metastore.api.LockComponent)4 LockRequest (org.apache.hadoop.hive.metastore.api.LockRequest)4 LockResponse (org.apache.hadoop.hive.metastore.api.LockResponse)4 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)2 UnlockRequest (org.apache.hadoop.hive.metastore.api.UnlockRequest)2 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 HashMap (java.util.HashMap)1