use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactor method dynamicPartitioningDelete.
@Test
public void dynamicPartitioningDelete() throws Exception {
String tblName = "ddpct";
List<String> colNames = Arrays.asList("a", "b");
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'fred' where a = 1", driver);
executeStatementOnDriver("delete from " + tblName + " where b = 'fred'", driver);
Initiator initiator = new Initiator();
initiator.setThreadId((int) initiator.getId());
// Set to 2 so insert and update don't set it off but delete does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
initiator.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean();
stop.set(true);
initiator.init(stop, new AtomicBoolean());
initiator.run();
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
for (int i = 0; i < compacts.size(); i++) {
Assert.assertEquals("default", compacts.get(i).getDbname());
Assert.assertEquals(tblName, compacts.get(i).getTablename());
Assert.assertEquals("initiated", compacts.get(i).getState());
partNames.add(compacts.get(i).getPartitionname());
}
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactor method testTableProperties.
/**
* Users have the choice of specifying compaction related tblproperties either in CREATE TABLE
* statement or in ALTER TABLE .. COMPACT statement. This tests both cases.
* @throws Exception
*/
@Test
public void testTableProperties() throws Exception {
// plain acid table
String tblName1 = "ttp1";
// acid table with customized tblproperties
String tblName2 = "ttp2";
executeStatementOnDriver("drop table if exists " + tblName1, driver);
executeStatementOnDriver("drop table if exists " + tblName2, driver);
executeStatementOnDriver("CREATE TABLE " + tblName1 + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("CREATE TABLE " + tblName2 + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES (" + "'transactional'='true'," + // 2048 MB memory for compaction map job
"'compactor.mapreduce.map.memory.mb'='2048'," + // minor compaction if more than 4 delta dirs
"'compactorthreshold.hive.compactor.delta.num.threshold'='4'," + // major compaction if more than 49%
"'compactorthreshold.hive.compactor.delta.pct.threshold'='0.49'" + ")", driver);
// Insert 5 rows to both tables
executeStatementOnDriver("insert into " + tblName1 + " values (1, 'a')", driver);
executeStatementOnDriver("insert into " + tblName1 + " values (2, 'b')", driver);
executeStatementOnDriver("insert into " + tblName1 + " values (3, 'c')", driver);
executeStatementOnDriver("insert into " + tblName1 + " values (4, 'd')", driver);
executeStatementOnDriver("insert into " + tblName1 + " values (5, 'e')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (1, 'a')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (2, 'b')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (3, 'c')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (4, 'd')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (5, 'e')", driver);
runInitiator(conf);
// Compactor should only schedule compaction for ttp2 (delta.num.threshold=4), not ttp1
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(1, rsp.getCompacts().size());
Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
// type is MAJOR since there's no base yet
Assert.assertEquals(CompactionType.MAJOR, rsp.getCompacts().get(0).getType());
// Finish the scheduled compaction for ttp2, and manually compact ttp1, to make them comparable again
executeStatementOnDriver("alter table " + tblName1 + " compact 'major'", driver);
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(2, rsp.getCompacts().size());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
Assert.assertEquals("ttp1", rsp.getCompacts().get(1).getTablename());
Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(1).getState());
// compact ttp2, by running the Worker explicitly, in order to get the reference to the compactor MR job
AtomicBoolean stop = new AtomicBoolean(true);
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
JobConf job = t.getMrJob();
// 2048 comes from tblproperties
Assert.assertEquals("2048", job.get("mapreduce.map.memory.mb"));
// Compact ttp1
stop = new AtomicBoolean(true);
t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
job = t.getMrJob();
// 1024 is the default value
Assert.assertEquals("1024", job.get("mapreduce.map.memory.mb"));
// Clean up
runCleaner(conf);
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(2, rsp.getCompacts().size());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
Assert.assertEquals("ttp1", rsp.getCompacts().get(1).getTablename());
Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(1).getState());
// Insert one more row - this should trigger hive.compactor.delta.pct.threshold to be reached for ttp2
executeStatementOnDriver("insert into " + tblName1 + " values (6, 'f')", driver);
executeStatementOnDriver("insert into " + tblName2 + " values (6, 'f')", driver);
// Intentionally set this high so that it will not trigger major compaction for ttp1.
// Only trigger major compaction for ttp2 (delta.pct.threshold=0.5) because of the newly inserted row (actual pct: 0.66)
conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 0.8f);
runInitiator(conf);
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(3, rsp.getCompacts().size());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
// Finish the scheduled compaction for ttp2
runWorker(conf);
runCleaner(conf);
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(3, rsp.getCompacts().size());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
// Now test tblproperties specified on ALTER TABLE .. COMPACT .. statement
executeStatementOnDriver("insert into " + tblName2 + " values (7, 'g')", driver);
executeStatementOnDriver("alter table " + tblName2 + " compact 'major'" + " with overwrite tblproperties (" + "'compactor.mapreduce.map.memory.mb'='3072'," + "'tblprops.orc.compress.size'='8192')", driver);
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(4, rsp.getCompacts().size());
Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename());
Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState());
// Run the Worker explicitly, in order to get the reference to the compactor MR job
stop = new AtomicBoolean(true);
t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
job = t.getMrJob();
Assert.assertEquals("3072", job.get("mapreduce.map.memory.mb"));
Assert.assertTrue(job.get("hive.compactor.table.props").contains("orc.compress.size4:8192"));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestTxnCommands2 method testCompactWithDelete.
@Test
public void testCompactWithDelete() throws Exception {
int[][] tableData = { { 1, 2 }, { 3, 4 } };
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(hiveConf);
AtomicBoolean stop = new AtomicBoolean();
AtomicBoolean looped = new AtomicBoolean();
stop.set(true);
t.init(stop, looped);
t.run();
runStatementOnDriver("delete from " + Table.ACIDTBL + " where b = 4");
runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2");
runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MINOR'");
t.run();
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class Initiator method run.
@Override
public void run() {
// so wrap it in a big catch Throwable statement.
try {
recoverFailedCompactions(false);
int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
// HiveMetaStore.
do {
long startedAt = -1;
TxnStore.MutexAPI.LockHandle handle = null;
// don't doom the entire thread.
try {
handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
startedAt = System.currentTimeMillis();
//todo: add method to only get current i.e. skip history - more efficient
ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
ValidTxnList txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
for (CompactionInfo ci : potentials) {
LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
try {
Table t = resolveTable(ci);
if (t == null) {
// Most likely this means it's a temp table
LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
continue;
}
// check if no compaction set for this table
if (noAutoCompactSet(t)) {
LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
continue;
}
// then it's a dynamic partitioning case and we shouldn't check the table itself.
if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
continue;
}
//the time currentCompactions is generated and now
if (lookForCurrentCompactions(currentCompactions, ci)) {
LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
continue;
}
if (txnHandler.checkFailedCompactions(ci)) {
LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
txnHandler.markFailed(ci);
continue;
}
// Figure out who we should run the file operations as
Partition p = resolvePartition(ci);
if (p == null && ci.partName != null) {
LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
continue;
}
StorageDescriptor sd = resolveStorageDescriptor(t, p);
String runAs = findUserToRunAs(sd.getLocation(), t);
/*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
* Long term we should consider having a thread pool here and running checkForCompactionS
* in parallel*/
CompactionType compactionNeeded = checkForCompaction(ci, txns, sd, t.getParameters(), runAs);
if (compactionNeeded != null)
requestCompaction(ci, runAs, compactionNeeded);
} catch (Throwable t) {
LOG.error("Caught exception while trying to determine if we should compact " + ci + ". Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
txnHandler.markFailed(ci);
}
}
// Check for timed out remote workers.
recoverFailedCompactions(true);
// Clean anything from the txns table that has no components left in txn_components.
txnHandler.cleanEmptyAbortedTxns();
} catch (Throwable t) {
LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
} finally {
if (handle != null) {
handle.releaseLocks();
}
}
long elapsedTime = System.currentTimeMillis() - startedAt;
if (elapsedTime >= checkInterval || stop.get())
continue;
else
Thread.sleep(checkInterval - elapsedTime);
} while (!stop.get());
} catch (Throwable t) {
LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
}
}
Aggregations