use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactor method dynamicPartitioningInsert.
@Test
public void dynamicPartitioningInsert() throws Exception {
String tblName = "dpct";
List<String> colNames = Arrays.asList("a", "b");
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
Initiator initiator = new Initiator();
initiator.setThreadId((int) initiator.getId());
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
initiator.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean();
stop.set(true);
initiator.init(stop, new AtomicBoolean());
initiator.run();
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(2, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
for (int i = 0; i < compacts.size(); i++) {
Assert.assertEquals("default", compacts.get(i).getDbname());
Assert.assertEquals(tblName, compacts.get(i).getTablename());
Assert.assertEquals("initiated", compacts.get(i).getState());
partNames.add(compacts.get(i).getPartitionname());
}
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
Assert.assertEquals("ds=yesterday", names.get(1));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactor method dynamicPartitioningUpdate.
@Test
public void dynamicPartitioningUpdate() throws Exception {
String tblName = "udpct";
List<String> colNames = Arrays.asList("a", "b");
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
Initiator initiator = new Initiator();
initiator.setThreadId((int) initiator.getId());
// Set to 1 so insert doesn't set it off but update does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
initiator.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean();
stop.set(true);
initiator.init(stop, new AtomicBoolean());
initiator.run();
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(2, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
for (int i = 0; i < compacts.size(); i++) {
Assert.assertEquals("default", compacts.get(i).getDbname());
Assert.assertEquals(tblName, compacts.get(i).getTablename());
Assert.assertEquals("initiated", compacts.get(i).getState());
partNames.add(compacts.get(i).getPartitionname());
}
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
Assert.assertEquals("ds=yesterday", names.get(1));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactor method schemaEvolutionAddColDynamicPartitioningUpdate.
@Test
public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception {
String tblName = "udpct";
List<String> colNames = Arrays.asList("a", "b");
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
// Validate the update.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(2, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1));
// ALTER TABLE ... ADD COLUMNS
executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
// Validate there is an added NULL for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(2, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
// Second INSERT round with new inserts into previously existing partition 'yesterday'.
executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
// Validate there the new insertions for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
executeStatementOnDriver("update " + tblName + " set c = 2000", driver);
// Validate the update of new column c, even in old rows.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
Initiator initiator = new Initiator();
initiator.setThreadId((int) initiator.getId());
// Set to 1 so insert doesn't set it off but update does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
initiator.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean();
stop.set(true);
initiator.init(stop, new AtomicBoolean());
initiator.run();
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(4, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
for (int i = 0; i < compacts.size(); i++) {
Assert.assertEquals("default", compacts.get(i).getDbname());
Assert.assertEquals(tblName, compacts.get(i).getTablename());
Assert.assertEquals("initiated", compacts.get(i).getState());
partNames.add(compacts.get(i).getPartitionname());
}
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=last_century", names.get(0));
Assert.assertEquals("ds=soon", names.get(1));
Assert.assertEquals("ds=today", names.get(2));
Assert.assertEquals("ds=yesterday", names.get(3));
// Validate after compaction.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestAcidOnTez method testJoin.
// Ideally test like this should be a qfile test. However, the explain output from qfile is always
// slightly different depending on where the test is run, specifically due to file size estimation
private void testJoin(String engine, String joinType) throws Exception {
// make a clone of existing hive conf
HiveConf confForTez = new HiveConf(hiveConf);
// make a clone of existing hive conf
HiveConf confForMR = new HiveConf(hiveConf);
if (engine.equals("tez")) {
// one-time setup to make query able to run with Tez
setupTez(confForTez);
}
if (joinType.equals("MapJoin")) {
setupMapJoin(confForTez);
setupMapJoin(confForMR);
}
runQueries(engine, joinType, confForTez, confForMR);
// Perform compaction. Join result after compaction should still be the same
runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
TestTxnCommands2.runWorker(hiveConf);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
TestTxnCommands2.runCleaner(hiveConf);
runQueries(engine, joinType, confForTez, confForMR);
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactRequest in project hive by apache.
the class TestCompactionTxnHandler method testFindNextToCompact.
@Test
public void testFindNextToCompact() throws Exception {
CompactionRequest rqst = new CompactionRequest("foo", "bar", CompactionType.MINOR);
rqst.setPartitionname("ds=today");
txnHandler.compact(rqst);
long now = System.currentTimeMillis();
CompactionInfo ci = txnHandler.findNextToCompact("fred");
assertNotNull(ci);
assertEquals("foo", ci.dbname);
assertEquals("bar", ci.tableName);
assertEquals("ds=today", ci.partName);
assertEquals(CompactionType.MINOR, ci.type);
assertNull(ci.runAs);
assertNull(txnHandler.findNextToCompact("fred"));
txnHandler.setRunAs(ci.id, "bob");
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
assertEquals(1, compacts.size());
ShowCompactResponseElement c = compacts.get(0);
assertEquals("foo", c.getDbname());
assertEquals("bar", c.getTablename());
assertEquals("ds=today", c.getPartitionname());
assertEquals(CompactionType.MINOR, c.getType());
assertEquals("working", c.getState());
assertTrue(c.getStart() - 5000 < now && c.getStart() + 5000 > now);
assertEquals("fred", c.getWorkerid());
assertEquals("bob", c.getRunAs());
}
Aggregations