use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestTxnCommands2 method testCompactWithDelete.
@Test
public void testCompactWithDelete() throws Exception {
int[][] tableData = { { 1, 2 }, { 3, 4 } };
runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setConf(hiveConf);
AtomicBoolean stop = new AtomicBoolean();
AtomicBoolean looped = new AtomicBoolean();
stop.set(true);
t.init(stop, looped);
t.run();
runStatementOnDriver("delete from " + Table.ACIDTBL + " where b = 4");
runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2");
runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MINOR'");
t.run();
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestTxnCommands2 method testEmptyInTblproperties.
/**
* https://issues.apache.org/jira/browse/HIVE-17391
*/
@Test
public void testEmptyInTblproperties() throws Exception {
runStatementOnDriver("create table t1 " + "(a int, b int) stored as orc TBLPROPERTIES ('serialization.null.format'='', 'transactional'='true')");
runStatementOnDriver("insert into t1 " + "(a,b) values(1,7),(3,7)");
runStatementOnDriver("update t1" + " set b = -2 where b = 2");
runStatementOnDriver("alter table t1 " + " compact 'MAJOR'");
runWorker(hiveConf);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestTxnNoBuckets method testEmptyCompactionResult.
/**
* see HIVE-18429
*/
@Test
public void testEmptyCompactionResult() throws Exception {
hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true");
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T (a int, b int) stored as orc");
int[][] data = { { 1, 2 }, { 3, 4 } };
runStatementOnDriver("insert into T" + makeValuesClause(data));
runStatementOnDriver("insert into T" + makeValuesClause(data));
// delete the bucket files so now we have empty delta dirs
List<String> rs = runStatementOnDriver("select distinct INPUT__FILE__NAME from T");
FileSystem fs = FileSystem.get(hiveConf);
for (String path : rs) {
fs.delete(new Path(path), true);
}
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
// check status of compaction job
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
// now run another compaction make sure empty dirs don't cause issues
runStatementOnDriver("insert into T" + makeValuesClause(data));
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
// check status of compaction job
resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
for (int i = 0; i < 2; i++) {
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(i).getState());
Assert.assertTrue(resp.getCompacts().get(i).getHadoopJobId().startsWith("job_local"));
}
rs = runStatementOnDriver("select a, b from T order by a, b");
Assert.assertEquals(stringifyValues(data), rs);
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestTxnNoBuckets method testNonAcidToAcidVectorzied.
/**
* Tests to check that we are able to use vectorized acid reader,
* VectorizedOrcAcidRowBatchReader, when reading "original" files,
* i.e. those that were written before the table was converted to acid.
* See also acid_vectorization_original*.q
*/
@Test
public void testNonAcidToAcidVectorzied() throws Exception {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
// this enables vectorization of ROW__ID
// HIVE-12631
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')");
int[][] values = { { 1, 2 }, { 2, 4 }, { 5, 6 }, { 6, 8 }, { 9, 10 } };
runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values));
// , 'transactional_properties'='default'
runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')");
// Execution mode: vectorized
// this uses VectorizedOrcAcidRowBatchReader
String query = "select a from T where b > 6 order by a";
List<String> rs = runStatementOnDriver(query);
String[][] expected = { { "6", "" }, { "9", "" } };
checkExpected(rs, expected, "After conversion");
Assert.assertEquals(Integer.toString(6), rs.get(0));
Assert.assertEquals(Integer.toString(9), rs.get(1));
assertVectorized(shouldVectorize(), query);
// why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level
// this uses VectorizedOrcAcidRowBatchReader
query = "select ROW__ID, a from T where b > 6 order by a";
rs = runStatementOnDriver(query);
String[][] expected1 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
checkExpected(rs, expected1, "After conversion with VC1");
assertVectorized(shouldVectorize(), query);
// this uses VectorizedOrcAcidRowBatchReader
query = "select ROW__ID, a from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected2 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
checkExpected(rs, expected2, "After conversion with VC2");
assertVectorized(shouldVectorize(), query);
// doesn't vectorize (uses neither of the Vectorzied Acid readers)
query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a";
rs = runStatementOnDriver(query);
Assert.assertEquals("", 2, rs.size());
String[][] expected3 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0" } };
checkExpected(rs, expected3, "After non-vectorized read");
Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
// vectorized because there is INPUT__FILE__NAME
assertVectorized(false, query);
runStatementOnDriver("update T set b = 17 where a = 1");
// this should use VectorizedOrcAcidRowReader
query = "select ROW__ID, b from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected4 = { { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}", "17" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "4" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "8" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "10" } };
checkExpected(rs, expected4, "After conversion with VC4");
assertVectorized(shouldVectorize(), query);
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
// this should not vectorize at all
query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
rs = runStatementOnDriver(query);
String[][] expected5 = { // the row__ids are the same after compaction
{ "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000" } };
checkExpected(rs, expected5, "After major compaction");
// vectorized because there is INPUT__FILE__NAME
assertVectorized(false, query);
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class Initiator method run.
@Override
public void run() {
// so wrap it in a big catch Throwable statement.
try {
recoverFailedCompactions(false);
int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
// HiveMetaStore.
do {
long startedAt = -1;
TxnStore.MutexAPI.LockHandle handle = null;
// don't doom the entire thread.
try {
handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
startedAt = System.currentTimeMillis();
// todo: add method to only get current i.e. skip history - more efficient
ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
for (CompactionInfo ci : potentials) {
LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
try {
Table t = resolveTable(ci);
if (t == null) {
// Most likely this means it's a temp table
LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
continue;
}
// check if no compaction set for this table
if (noAutoCompactSet(t)) {
LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
continue;
}
// then it's a dynamic partitioning case and we shouldn't check the table itself.
if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
continue;
}
// the time currentCompactions is generated and now
if (lookForCurrentCompactions(currentCompactions, ci)) {
LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
continue;
}
if (txnHandler.checkFailedCompactions(ci)) {
LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
txnHandler.markFailed(ci);
continue;
}
// Figure out who we should run the file operations as
Partition p = resolvePartition(ci);
if (p == null && ci.partName != null) {
LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
continue;
}
// Compaction doesn't work under a transaction and hence pass null for validTxnList
// The response will have one entry per table and hence we get only one ValidWriteIdList
String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
GetValidWriteIdsRequest rqst = new GetValidWriteIdsRequest(Collections.singletonList(fullTableName), null);
ValidWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(rqst).getTblValidWriteIds().get(0));
StorageDescriptor sd = resolveStorageDescriptor(t, p);
String runAs = findUserToRunAs(sd.getLocation(), t);
/*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
* Long term we should consider having a thread pool here and running checkForCompactionS
* in parallel*/
CompactionType compactionNeeded = checkForCompaction(ci, tblValidWriteIds, sd, t.getParameters(), runAs);
if (compactionNeeded != null)
requestCompaction(ci, runAs, compactionNeeded);
} catch (Throwable t) {
LOG.error("Caught exception while trying to determine if we should compact " + ci + ". Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
txnHandler.markFailed(ci);
}
}
// Check for timed out remote workers.
recoverFailedCompactions(true);
// Clean anything from the txns table that has no components left in txn_components.
txnHandler.cleanEmptyAbortedTxns();
} catch (Throwable t) {
LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
} finally {
if (handle != null) {
handle.releaseLocks();
}
}
long elapsedTime = System.currentTimeMillis() - startedAt;
if (elapsedTime >= checkInterval || stop.get())
continue;
else
Thread.sleep(checkInterval - elapsedTime);
} while (!stop.get());
} catch (Throwable t) {
LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
}
}
Aggregations