Search in sources :

Example 66 with ShowCompactRequest

use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.

the class TestCompactor method dynamicPartitioningDelete.

@Test
public void dynamicPartitioningDelete() throws Exception {
    String tblName = "ddpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    executeStatementOnDriver("update " + tblName + " set b = 'fred' where a = 1", driver);
    executeStatementOnDriver("delete from " + tblName + " where b = 'fred'", driver);
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    // Set to 2 so insert and update don't set it off but delete does
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
    initiator.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(1, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=today", names.get(0));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 67 with ShowCompactRequest

use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.

the class TestCompactor method testTableProperties.

/**
   * Users have the choice of specifying compaction related tblproperties either in CREATE TABLE
   * statement or in ALTER TABLE .. COMPACT statement. This tests both cases.
   * @throws Exception
   */
@Test
public void testTableProperties() throws Exception {
    // plain acid table
    String tblName1 = "ttp1";
    // acid table with customized tblproperties
    String tblName2 = "ttp2";
    executeStatementOnDriver("drop table if exists " + tblName1, driver);
    executeStatementOnDriver("drop table if exists " + tblName2, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName1 + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("CREATE TABLE " + tblName2 + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES (" + "'transactional'='true'," + // 2048 MB memory for compaction map job
    "'compactor.mapreduce.map.memory.mb'='2048'," + // minor compaction if more than 4 delta dirs
    "'compactorthreshold.hive.compactor.delta.num.threshold'='4'," + // major compaction if more than 49%
    "'compactorthreshold.hive.compactor.delta.pct.threshold'='0.49'" + ")", driver);
    // Insert 5 rows to both tables
    executeStatementOnDriver("insert into " + tblName1 + " values (1, 'a')", driver);
    executeStatementOnDriver("insert into " + tblName1 + " values (2, 'b')", driver);
    executeStatementOnDriver("insert into " + tblName1 + " values (3, 'c')", driver);
    executeStatementOnDriver("insert into " + tblName1 + " values (4, 'd')", driver);
    executeStatementOnDriver("insert into " + tblName1 + " values (5, 'e')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (1, 'a')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (2, 'b')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (3, 'c')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (4, 'd')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (5, 'e')", driver);
    runInitiator(conf);
    // Compactor should only schedule compaction for ttp2 (delta.num.threshold=4), not ttp1
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(1, rsp.getCompacts().size());
    Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    // type is MAJOR since there's no base yet
    Assert.assertEquals(CompactionType.MAJOR, rsp.getCompacts().get(0).getType());
    // Finish the scheduled compaction for ttp2, and manually compact ttp1, to make them comparable again
    executeStatementOnDriver("alter table " + tblName1 + " compact 'major'", driver);
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(2, rsp.getCompacts().size());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
    Assert.assertEquals("ttp1", rsp.getCompacts().get(1).getTablename());
    Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(1).getState());
    // compact ttp2, by running the Worker explicitly, in order to get the reference to the compactor MR job
    AtomicBoolean stop = new AtomicBoolean(true);
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(conf);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    JobConf job = t.getMrJob();
    // 2048 comes from tblproperties
    Assert.assertEquals("2048", job.get("mapreduce.map.memory.mb"));
    // Compact ttp1
    stop = new AtomicBoolean(true);
    t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(conf);
    looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    job = t.getMrJob();
    // 1024 is the default value
    Assert.assertEquals("1024", job.get("mapreduce.map.memory.mb"));
    // Clean up
    runCleaner(conf);
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(2, rsp.getCompacts().size());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
    Assert.assertEquals("ttp1", rsp.getCompacts().get(1).getTablename());
    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(1).getState());
    // Insert one more row - this should trigger hive.compactor.delta.pct.threshold to be reached for ttp2
    executeStatementOnDriver("insert into " + tblName1 + " values (6, 'f')", driver);
    executeStatementOnDriver("insert into " + tblName2 + " values (6, 'f')", driver);
    // Intentionally set this high so that it will not trigger major compaction for ttp1.
    // Only trigger major compaction for ttp2 (delta.pct.threshold=0.5) because of the newly inserted row (actual pct: 0.66)
    conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 0.8f);
    runInitiator(conf);
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(3, rsp.getCompacts().size());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
    // Finish the scheduled compaction for ttp2
    runWorker(conf);
    runCleaner(conf);
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(3, rsp.getCompacts().size());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
    // Now test tblproperties specified on ALTER TABLE .. COMPACT .. statement
    executeStatementOnDriver("insert into " + tblName2 + " values (7, 'g')", driver);
    executeStatementOnDriver("alter table " + tblName2 + " compact 'major'" + " with overwrite tblproperties (" + "'compactor.mapreduce.map.memory.mb'='3072'," + "'tblprops.orc.compress.size'='8192')", driver);
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(4, rsp.getCompacts().size());
    Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
    // Run the Worker explicitly, in order to get the reference to the compactor MR job
    stop = new AtomicBoolean(true);
    t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(conf);
    looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    job = t.getMrJob();
    Assert.assertEquals("3072", job.get("mapreduce.map.memory.mb"));
    Assert.assertTrue(job.get("hive.compactor.table.props").contains("orc.compress.size4:8192"));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 68 with ShowCompactRequest

use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.

the class TestTxnCommands2 method testCompactWithDelete.

@Test
public void testCompactWithDelete() throws Exception {
    int[][] tableData = { { 1, 2 }, { 3, 4 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    runStatementOnDriver("delete from " + Table.ACIDTBL + " where b = 4");
    runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2");
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MINOR'");
    t.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 69 with ShowCompactRequest

use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.

the class Initiator method run.

@Override
public void run() {
    // so wrap it in a big catch Throwable statement.
    try {
        recoverFailedCompactions(false);
        int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
        // HiveMetaStore.
        do {
            long startedAt = -1;
            TxnStore.MutexAPI.LockHandle handle = null;
            // don't doom the entire thread.
            try {
                handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
                startedAt = System.currentTimeMillis();
                //todo: add method to only get current i.e. skip history - more efficient
                ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                ValidTxnList txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
                Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
                for (CompactionInfo ci : potentials) {
                    LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                    try {
                        Table t = resolveTable(ci);
                        if (t == null) {
                            // Most likely this means it's a temp table
                            LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
                            continue;
                        }
                        // check if no compaction set for this table
                        if (noAutoCompactSet(t)) {
                            LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
                            continue;
                        }
                        // then it's a dynamic partitioning case and we shouldn't check the table itself.
                        if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
                            LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
                            continue;
                        }
                        //the time currentCompactions is generated and now
                        if (lookForCurrentCompactions(currentCompactions, ci)) {
                            LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
                            continue;
                        }
                        if (txnHandler.checkFailedCompactions(ci)) {
                            LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
                            txnHandler.markFailed(ci);
                            continue;
                        }
                        // Figure out who we should run the file operations as
                        Partition p = resolvePartition(ci);
                        if (p == null && ci.partName != null) {
                            LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
                            continue;
                        }
                        StorageDescriptor sd = resolveStorageDescriptor(t, p);
                        String runAs = findUserToRunAs(sd.getLocation(), t);
                        /*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
              * Long term we should consider having a thread pool here and running checkForCompactionS
              * in parallel*/
                        CompactionType compactionNeeded = checkForCompaction(ci, txns, sd, t.getParameters(), runAs);
                        if (compactionNeeded != null)
                            requestCompaction(ci, runAs, compactionNeeded);
                    } catch (Throwable t) {
                        LOG.error("Caught exception while trying to determine if we should compact " + ci + ".  Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
                        txnHandler.markFailed(ci);
                    }
                }
                // Check for timed out remote workers.
                recoverFailedCompactions(true);
                // Clean anything from the txns table that has no components left in txn_components.
                txnHandler.cleanEmptyAbortedTxns();
            } catch (Throwable t) {
                LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
            } finally {
                if (handle != null) {
                    handle.releaseLocks();
                }
            }
            long elapsedTime = System.currentTimeMillis() - startedAt;
            if (elapsedTime >= checkInterval || stop.get())
                continue;
            else
                Thread.sleep(checkInterval - elapsedTime);
        } while (!stop.get());
    } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo)

Aggregations

ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)69 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)69 Test (org.junit.Test)65 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)51 Table (org.apache.hadoop.hive.metastore.api.Table)48 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)41 ArrayList (java.util.ArrayList)28 LockComponent (org.apache.hadoop.hive.metastore.api.LockComponent)22 LockRequest (org.apache.hadoop.hive.metastore.api.LockRequest)22 LockResponse (org.apache.hadoop.hive.metastore.api.LockResponse)22 Partition (org.apache.hadoop.hive.metastore.api.Partition)20 Path (org.apache.hadoop.fs.Path)19 FileStatus (org.apache.hadoop.fs.FileStatus)14 FileSystem (org.apache.hadoop.fs.FileSystem)14 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)13 CommitTxnRequest (org.apache.hadoop.hive.metastore.api.CommitTxnRequest)12 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)10 AbortTxnRequest (org.apache.hadoop.hive.metastore.api.AbortTxnRequest)6 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)6