Search in sources :

Example 56 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestStatsUpdaterThread method testPartitionsWithDifferentColsAll.

@Test(timeout = 80000)
public void testPartitionsWithDifferentColsAll() throws Exception {
    StatsUpdaterThread su = createUpdater();
    IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
    executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)");
    executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')");
    executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')");
    executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s");
    executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns s, u");
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("t", "u"), msClient, false);
    verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "u"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("t"), msClient, false);
    assertTrue(su.runOneIteration());
    // Different columns means different commands have to be run.
    drainWorkQueue(su, 2);
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t", "u"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
    assertFalse(su.runOneIteration());
    // Nothing else is updated after the first update.
    drainWorkQueue(su, 0);
    msClient.close();
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 57 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestStatsUpdaterThread method testMultipleTables.

@Test(timeout = 80000)
public void testMultipleTables() throws Exception {
    StatsUpdaterThread su = createUpdater();
    IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
    executeQuery("create table simple_stats (s string)");
    executeQuery("insert into simple_stats (s) values ('test')");
    executeQuery("create table simple_stats2 (s string)");
    executeQuery("insert into simple_stats2 (s) values ('test2')");
    verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient);
    verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
    assertTrue(su.runOneIteration());
    drainWorkQueue(su);
    verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient);
    verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
    setTableSkipProperty(msClient, "simple_stats", "true");
    assertTrue(su.runOneIteration());
    drainWorkQueue(su);
    verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, false);
    verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient);
    msClient.close();
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 58 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestStatsUpdaterThread method testTxnPartitions.

@Test
public void testTxnPartitions() throws Exception {
    StatsUpdaterThread su = createUpdater();
    IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
    executeQuery("create table simple_stats (s string) partitioned by (p int) TBLPROPERTIES " + "(\"transactional\"=\"true\", \"transactional_properties\"=\"insert_only\")");
    executeQuery("insert into simple_stats partition(p=1) values ('test')");
    executeQuery("insert into simple_stats partition(p=2) values ('test2')");
    executeQuery("insert into simple_stats partition(p=3) values ('test3')");
    assertFalse(su.runOneIteration());
    drainWorkQueue(su, 0);
    executeQuery("insert overwrite table simple_stats partition(p=1) values ('test2')");
    executeQuery("insert overwrite table simple_stats partition(p=2) values ('test3')");
    assertFalse(su.runOneIteration());
    drainWorkQueue(su, 0);
    // Overwrite the txn state to refer to an aborted txn on some partitions.
    String dbName = ss.getCurrentDatabase(), tblName = "simple_stats", fqName = dbName + "." + tblName;
    long badTxnId = msClient.openTxn("moo");
    long badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName);
    msClient.abortTxns(Lists.newArrayList(badTxnId));
    Partition part1 = msClient.getPartition(dbName, tblName, "p=1");
    Partition part2 = msClient.getPartition(dbName, tblName, "p=2");
    part1.setWriteId(badWriteId);
    part2.setWriteId(badWriteId);
    String currentWriteIds = msClient.getValidWriteIds(fqName).toString();
    // To update write ID we need to specify the write ID list to validate concurrent writes.
    msClient.alter_partitions(dbName, tblName, Lists.newArrayList(part1), null, currentWriteIds, badWriteId);
    msClient.alter_partitions(dbName, tblName, Lists.newArrayList(part2), null, currentWriteIds, badWriteId);
    // We expect two partitions to be updated.
    Map<String, List<ColumnStatisticsObj>> stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
    assertEquals(1, stats.size());
    assertTrue(su.runOneIteration());
    drainWorkQueue(su, 2);
    // Analyze treats stats like data (new write ID), so stats still should not be valid.
    stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
    assertEquals(1, stats.size());
    // Test with null list of partNames
    stats = msClient.getPartitionColumnStatistics(dbName, tblName, null, Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
    assertEquals(0, stats.size());
    // New reader.
    currentWriteIds = msClient.getValidWriteIds(fqName).toString();
    stats = msClient.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), Lists.newArrayList("s"), Constants.HIVE_ENGINE, currentWriteIds);
    assertEquals(3, stats.size());
    msClient.close();
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ArrayList(java.util.ArrayList) List(java.util.List) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 59 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestStatsUpdaterThread method testPartitionsWithDifferentColsExistingOnly.

@Test(timeout = 80000)
public void testPartitionsWithDifferentColsExistingOnly() throws Exception {
    hiveConf.set(MetastoreConf.ConfVars.STATS_AUTO_UPDATE.getVarname(), "existing");
    StatsUpdaterThread su = createUpdater();
    IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
    executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)");
    executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')");
    executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')");
    executeQuery("insert into simple_stats partition(i=2) values ('test', '2', 'baz')");
    executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s, t");
    executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns");
    executeQuery("analyze table simple_stats partition(i=2) compute statistics for columns s");
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("u"), msClient, false);
    verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("s"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("u", "t"), msClient, false);
    // We will unset s on i=0, and t on i=1. Only these should be updated; and nothing for 2.
    verifyAndUnsetColStats("simple_stats", "i=0", Lists.newArrayList("s"), msClient);
    verifyAndUnsetColStats("simple_stats", "i=1", Lists.newArrayList("t"), msClient);
    assertTrue(su.runOneIteration());
    drainWorkQueue(su, 2);
    // Exact same state as above.
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("u"), msClient, false);
    verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("s"), msClient, true);
    verifyStatsUpToDate("simple_stats", "i=2", Lists.newArrayList("u", "t"), msClient, false);
    msClient.close();
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 60 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestStatsUpdaterThread method testParallelOps.

@Test(timeout = 80000)
public void testParallelOps() throws Exception {
    // Set high worker count so we get a longer queue.
    hiveConf.setInt(MetastoreConf.ConfVars.STATS_AUTO_UPDATE_WORKER_COUNT.getVarname(), 4);
    StatsUpdaterThread su = createUpdater();
    IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
    executeQuery("create table simple_stats (s string)");
    executeQuery("create table simple_stats2 (s string) partitioned by (i int)");
    executeQuery("create table simple_stats3 (s string) partitioned by (i int)");
    executeQuery("insert into simple_stats values ('test')");
    executeQuery("insert into simple_stats2 partition(i=0) values ('test')");
    executeQuery("insert into simple_stats3 partition(i=0) values ('test')");
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, true);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, true);
    executeQuery("insert into simple_stats3 partition(i=1) values ('test')");
    assertTrue(su.runOneIteration());
    assertEquals(3, su.getQueueLength());
    // Nothing updated yet.
    verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false);
    verifyPartStatsUpToDate(1, 0, msClient, "simple_stats2", false);
    verifyStatsUpToDate("simple_stats3", "i=0", Lists.newArrayList("s"), msClient, false);
    verifyStatsUpToDate("simple_stats3", "i=1", Lists.newArrayList("s"), msClient, true);
    assertFalse(su.runOneIteration());
    // Nothing new added to the queue while analyze runs.
    assertEquals(3, su.getQueueLength());
    // Add another partition without stats.
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
    executeQuery("insert into simple_stats3 partition(i=2) values ('test')");
    assertTrue(su.runOneIteration());
    // An item for new partition is queued now.
    assertEquals(4, su.getQueueLength());
    drainWorkQueue(su, 4);
    verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, true);
    verifyPartStatsUpToDate(1, 0, msClient, "simple_stats2", true);
    verifyPartStatsUpToDate(3, 0, msClient, "simple_stats3", true);
    assertFalse(su.runOneIteration());
    // Nothing else is updated after the first update.
    drainWorkQueue(su, 0);
    msClient.close();
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Aggregations

HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)141 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)81 Test (org.junit.Test)78 Table (org.apache.hadoop.hive.metastore.api.Table)60 FileSystem (org.apache.hadoop.fs.FileSystem)57 Path (org.apache.hadoop.fs.Path)45 HiveConf (org.apache.hadoop.hive.conf.HiveConf)31 Before (org.junit.Before)23 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)18 FileStatus (org.apache.hadoop.fs.FileStatus)17 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)16 File (java.io.File)12 IOException (java.io.IOException)12 HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)12 ArrayList (java.util.ArrayList)11 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)10 StreamingConnection (org.apache.hive.streaming.StreamingConnection)10 List (java.util.List)9 HashMap (java.util.HashMap)8 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)8