Search in sources :

Example 11 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestCompactor method schemaEvolutionAddColDynamicPartitioningUpdate.

@Test
public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception {
    String tblName = "udpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
    // Validate the update.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1));
    // ALTER TABLE ... ADD COLUMNS
    executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
    // Validate there is an added NULL for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    // Second INSERT round with new inserts into previously existing partition 'yesterday'.
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
    // Validate there the new insertions for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
    executeStatementOnDriver("update " + tblName + " set c = 2000", driver);
    // Validate the update of new column c, even in old rows.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    // Set to 1 so insert doesn't set it off but update does
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
    initiator.setConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(4, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=last_century", names.get(0));
    Assert.assertEquals("ds=soon", names.get(1));
    Assert.assertEquals("ds=today", names.get(2));
    Assert.assertEquals("ds=yesterday", names.get(3));
    // Validate after compaction.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 12 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestCompactor method dynamicPartitioningInsert.

@Test
public void dynamicPartitioningInsert() throws Exception {
    String tblName = "dpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
    initiator.setConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(2, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=today", names.get(0));
    Assert.assertEquals("ds=yesterday", names.get(1));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 13 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestTxnHandler method testCompactWhenAlreadyCompacting.

/**
 * Once a Compaction for a given resource is scheduled/working, we should not
 * schedule another one to prevent concurrent compactions for the same resource.
 * @throws Exception
 */
@Test
public void testCompactWhenAlreadyCompacting() throws Exception {
    CompactionRequest rqst = new CompactionRequest("foo", "bar", CompactionType.MAJOR);
    rqst.setPartitionname("ds=today");
    CompactionResponse resp = txnHandler.compact(rqst);
    Assert.assertEquals(resp, new CompactionResponse(1, TxnStore.INITIATED_RESPONSE, true));
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    assertEquals(1, compacts.size());
    rqst.setType(CompactionType.MINOR);
    resp = txnHandler.compact(rqst);
    Assert.assertEquals(resp, new CompactionResponse(1, TxnStore.INITIATED_RESPONSE, false));
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    compacts = rsp.getCompacts();
    assertEquals(1, compacts.size());
    ShowCompactResponseElement c = compacts.get(0);
    assertEquals("foo", c.getDbname());
    assertEquals("bar", c.getTablename());
    assertEquals("ds=today", c.getPartitionname());
    assertEquals(CompactionType.MAJOR, c.getType());
    assertEquals("initiated", c.getState());
    assertEquals(0L, c.getStart());
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionResponse(org.apache.hadoop.hive.metastore.api.CompactionResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 14 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestTxnCommands2 method testOriginalFileReaderWhenNonAcidConvertedToAcid.

@Test
public void testOriginalFileReaderWhenNonAcidConvertedToAcid() throws Exception {
    // 1. Insert five rows to Non-ACID table.
    runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2),(3,4),(5,6),(7,8),(9,10)");
    // 2. Convert NONACIDORCTBL to ACID table.  //todo: remove trans_prop after HIVE-17089
    runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')");
    runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b = b*2 where b in (4,10)");
    runStatementOnDriver("delete from " + Table.NONACIDORCTBL + " where a = 7");
    List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b");
    int[][] resultData = new int[][] { { 1, 2 }, { 3, 8 }, { 5, 6 }, { 9, 20 } };
    Assert.assertEquals(stringifyValues(resultData), rs);
    // 3. Perform a major compaction.
    runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " compact 'MAJOR'");
    runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // 3. Perform a delete.
    runStatementOnDriver("delete from " + Table.NONACIDORCTBL + " where a = 1");
    rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b");
    resultData = new int[][] { { 3, 8 }, { 5, 6 }, { 9, 20 } };
    Assert.assertEquals(stringifyValues(resultData), rs);
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 15 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestTxnNoBuckets method testCompactStatsGather.

/**
 * HIVE-17900
 */
@Test
public void testCompactStatsGather() throws Exception {
    hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T(a int, b int) partitioned by (p int, q int) " + "stored as orc TBLPROPERTIES ('transactional'='true')");
    int[][] targetVals = { { 4, 1, 1 }, { 4, 2, 2 }, { 4, 3, 1 }, { 4, 4, 2 } };
    // we only recompute stats after major compact if they existed before
    runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals));
    runStatementOnDriver("analyze table T  partition(p=1) compute statistics for columns");
    IMetaStoreClient hms = Hive.get().getMSC();
    List<String> partNames = new ArrayList<>();
    partNames.add("p=1/q=2");
    List<String> colNames = new ArrayList<>();
    colNames.add("a");
    Map<String, List<ColumnStatisticsObj>> map = hms.getPartitionColumnStatistics("default", "T", partNames, colNames);
    Assert.assertEquals(4, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue());
    int[][] targetVals2 = { { 5, 1, 1 }, { 5, 2, 2 }, { 5, 3, 1 }, { 5, 4, 2 } };
    runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals2));
    String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b";
    List<String> rs = runStatementOnDriver(query);
    String[][] expected = { { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000" } };
    checkExpected(rs, expected, "insert data");
    // run major compaction
    runStatementOnDriver("alter table T partition(p=1,q=2) compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b";
    rs = runStatementOnDriver(query);
    String[][] expected2 = { { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000002/bucket_00000" }, { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000002/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000002/bucket_00000" }, { "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000002/bucket_00000" } };
    checkExpected(rs, expected2, "after major compaction");
    // check status of compaction job
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // now check that stats were updated
    map = hms.getPartitionColumnStatistics("default", "T", partNames, colNames);
    Assert.assertEquals("", 5, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue());
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Aggregations

ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)78 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)75 Test (org.junit.Test)70 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)54 Table (org.apache.hadoop.hive.metastore.api.Table)48 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)41 ArrayList (java.util.ArrayList)29 Path (org.apache.hadoop.fs.Path)22 LockComponent (org.apache.hadoop.hive.metastore.api.LockComponent)22 LockRequest (org.apache.hadoop.hive.metastore.api.LockRequest)22 LockResponse (org.apache.hadoop.hive.metastore.api.LockResponse)22 Partition (org.apache.hadoop.hive.metastore.api.Partition)20 FileSystem (org.apache.hadoop.fs.FileSystem)16 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)15 FileStatus (org.apache.hadoop.fs.FileStatus)14 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)13 CommitTxnRequest (org.apache.hadoop.hive.metastore.api.CommitTxnRequest)12 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 AbortTxnRequest (org.apache.hadoop.hive.metastore.api.AbortTxnRequest)6 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)6