use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestStatsUpdaterThread method testPartitionsWithDifferentColsAll.
@Test(timeout = 80000)
public void testPartitionsWithDifferentColsAll() throws Exception {
StatsUpdaterThread su = createUpdater();
IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)");
executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')");
executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')");
executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s");
executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns s, u");
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("t", "u"), msClient, false);
verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "u"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("t"), msClient, false);
assertTrue(su.runOneIteration());
// Different columns means different commands have to be run.
drainWorkQueue(su, 2);
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t", "u"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
assertFalse(su.runOneIteration());
// Nothing else is updated after the first update.
drainWorkQueue(su, 0);
msClient.close();
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestStatsUpdaterThread method testMultipleTables.
@Test(timeout = 80000)
public void testMultipleTables() throws Exception {
StatsUpdaterThread su = createUpdater();
IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
executeQuery("create table simple_stats (s string)");
executeQuery("insert into simple_stats (s) values ('test')");
executeQuery("create table simple_stats2 (s string)");
executeQuery("insert into simple_stats2 (s) values ('test2')");
verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient);
verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
assertTrue(su.runOneIteration());
drainWorkQueue(su);
verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient);
verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
setTableSkipProperty(msClient, "simple_stats", "true");
assertTrue(su.runOneIteration());
drainWorkQueue(su);
verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, false);
verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
msClient.close();
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestStatsUpdaterThread method testTxnPartitions.
@Test
public void testTxnPartitions() throws Exception {
StatsUpdaterThread su = createUpdater();
IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
executeQuery("create table simple_stats (s string) partitioned by (p int) TBLPROPERTIES " + "(\"transactional\"=\"true\", \"transactional_properties\"=\"insert_only\")");
executeQuery("insert into simple_stats partition(p=1) values ('test')");
executeQuery("insert into simple_stats partition(p=2) values ('test2')");
executeQuery("insert into simple_stats partition(p=3) values ('test3')");
assertFalse(su.runOneIteration());
drainWorkQueue(su, 0);
executeQuery("insert overwrite table simple_stats partition(p=1) values ('test2')");
executeQuery("insert overwrite table simple_stats partition(p=2) values ('test3')");
assertFalse(su.runOneIteration());
drainWorkQueue(su, 0);
// Overwrite the txn state to refer to an aborted txn on some partitions.
String dbName = ss.getCurrentDatabase(), tblName = "simple_stats", fqName = dbName + "." + tblName;
long badTxnId = msClient.openTxn("moo");
long badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName);
msClient.abortTxns(Lists.newArrayList(badTxnId));
Partition part1 = msClient.getPartition(dbName, tblName, "p=1");
Partition part2 = msClient.getPartition(dbName, tblName, "p=2");
part1.setWriteId(badWriteId);
part2.setWriteId(badWriteId);
String currentWriteIds = msClient.getValidWriteIds(fqName).toString();
// To update write ID we need to specify the write ID list to validate concurrent writes.
msClient.alter_partitions(dbName, tblName, Lists.newArrayList(part1), null, currentWriteIds, badWriteId);
msClient.alter_partitions(dbName, tblName, Lists.newArrayList(part2), null, currentWriteIds, badWriteId);
// We expect two partitions to be updated.
Map<String, List<ColumnStatisticsObj>> stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
assertEquals(1, stats.size());
assertTrue(su.runOneIteration());
drainWorkQueue(su, 2);
// Analyze treats stats like data (new write ID), so stats still should not be valid.
stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
assertEquals(1, stats.size());
// Test with null list of partNames
stats = msClient.getPartitionColumnStatistics(dbName, tblName, null, Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
assertEquals(0, stats.size());
// New reader.
currentWriteIds = msClient.getValidWriteIds(fqName).toString();
stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
assertEquals(3, stats.size());
msClient.close();
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestStatsUpdaterThread method testPartitionsWithDifferentColsExistingOnly.
@Test(timeout = 80000)
public void testPartitionsWithDifferentColsExistingOnly() throws Exception {
hiveConf.set(MetastoreConf.ConfVars.STATS_AUTO_UPDATE.getVarname(), "existing");
StatsUpdaterThread su = createUpdater();
IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)");
executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')");
executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')");
executeQuery("insert into simple_stats partition(i=2) values ('test', '2', 'baz')");
executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s, t");
executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns");
executeQuery("analyze table simple_stats partition(i=2) compute statistics for columns s");
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("u"), msClient, false);
verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("s"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("u", "t"), msClient, false);
// We will unset s on i=0, and t on i=1. Only these should be updated; and nothing for 2.
verifyAndUnsetColStats("simple_stats", "i=0", Lists.newArrayList("s"), msClient);
verifyAndUnsetColStats("simple_stats", "i=1", Lists.newArrayList("t"), msClient);
assertTrue(su.runOneIteration());
drainWorkQueue(su, 2);
// Exact same state as above.
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("u"), msClient, false);
verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("s"), msClient, true);
verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("u", "t"), msClient, false);
msClient.close();
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestStatsUpdaterThread method testParallelOps.
@Test(timeout = 80000)
public void testParallelOps() throws Exception {
// Set high worker count so we get a longer queue.
hiveConf.setInt(MetastoreConf.ConfVars.STATS_AUTO_UPDATE_WORKER_COUNT.getVarname(), 4);
StatsUpdaterThread su = createUpdater();
IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
executeQuery("create table simple_stats (s string)");
executeQuery("create table simple_stats2 (s string) partitioned by (i int)");
executeQuery("create table simple_stats3 (s string) partitioned by (i int)");
executeQuery("insert into simple_stats values ('test')");
executeQuery("insert into simple_stats2 partition(i=0) values ('test')");
executeQuery("insert into simple_stats3 partition(i=0) values ('test')");
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, true);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, true);
executeQuery("insert into simple_stats3 partition(i=1) values ('test')");
assertTrue(su.runOneIteration());
assertEquals(3, su.getQueueLength());
// Nothing updated yet.
verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false);
verifyPartStatsUpToDate(1, 0, msClient, "simple_stats2", false);
verifyStatsUpToDate("simple_stats3", "i=0", Lists.newArrayList("s"), msClient, false);
verifyStatsUpToDate("simple_stats3", "i=1", Lists.newArrayList("s"), msClient, true);
assertFalse(su.runOneIteration());
// Nothing new added to the queue while analyze runs.
assertEquals(3, su.getQueueLength());
// Add another partition without stats.
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
executeQuery("insert into simple_stats3 partition(i=2) values ('test')");
assertTrue(su.runOneIteration());
// An item for new partition is queued now.
assertEquals(4, su.getQueueLength());
drainWorkQueue(su, 4);
verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, true);
verifyPartStatsUpToDate(1, 0, msClient, "simple_stats2", true);
verifyPartStatsUpToDate(3, 0, msClient, "simple_stats3", true);
assertFalse(su.runOneIteration());
// Nothing else is updated after the first update.
drainWorkQueue(su, 0);
msClient.close();
}
Aggregations