Search in sources :

Example 46 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method testCompactorGatherStats.

@Test
public void testCompactorGatherStats() throws Exception {
    String dbName = "default";
    String tableName = "stats_comp_test";
    List<String> colNames = Arrays.asList("a");
    executeStatementOnDriver("drop table if exists " + dbName + "." + tableName, driver);
    executeStatementOnDriver("create table " + dbName + "." + tableName + " (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(1)", driver);
    executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(1)", driver);
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tableName, CompactionType.MAJOR));
    runWorker(conf);
    // Make sure we do not have statistics for this table yet
    // Compaction generates stats only if there is any
    List<ColumnStatisticsObj> colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
    assertEquals("No stats should be there for the table", 0, colStats.size());
    executeStatementOnDriver("analyze table " + dbName + "." + tableName + " compute statistics for columns", driver);
    executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(2)", driver);
    // Make sure we have old statistics for the table
    colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
    assertEquals("Stats should be there", 1, colStats.size());
    assertEquals("Value should contain old data", 1, colStats.get(0).getStatsData().getLongStats().getHighValue());
    assertEquals("Value should contain old data", 1, colStats.get(0).getStatsData().getLongStats().getLowValue());
    txnHandler.compact(new CompactionRequest(dbName, tableName, CompactionType.MAJOR));
    runWorker(conf);
    // Make sure the statistics is NOT updated for the table (compaction triggers only a basic stats gathering)
    colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
    assertEquals("Stats should be there", 1, colStats.size());
    assertEquals("Value should contain new data", 1, colStats.get(0).getStatsData().getLongStats().getHighValue());
    assertEquals("Value should contain new data", 1, colStats.get(0).getStatsData().getLongStats().getLowValue());
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 47 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method minorCompactAfterAbort.

@Test
public void minorCompactAfterAbort() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    processStreamingAPI(dbName, tblName);
    // Now, compact
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
    runWorker(conf);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
    String[] names = new String[stat.length];
    Path resultDelta = null;
    for (int i = 0; i < names.length; i++) {
        names[i] = stat[i].getPath().getName();
        if (names[i].equals("delta_0000001_0000004_v0000009")) {
            resultDelta = stat[i].getPath();
        }
    }
    Arrays.sort(names);
    String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004_v0000009", "delta_0000003_0000004" };
    if (!Arrays.deepEquals(expected, names)) {
        Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
    }
    CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { resultDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, Lists.newArrayList(5, 6), 1);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 48 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method testMinorCompactionForSplitUpdateWithInsertsAndDeletes.

@Test
public void testMinorCompactionForSplitUpdateWithInsertsAndDeletes() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    // Insert some data -> this will generate only insert deltas and no delete deltas: delta_3_3
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
    // Insert some data -> this will again generate only insert deltas and no delete deltas: delta_4_4
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 'bar')", driver);
    // Delete some data -> this will generate only delete deltas and no insert deltas: delete_delta_5_5
    executeStatementOnDriver("DELETE FROM " + tblName + " WHERE a = 2", driver);
    // Now, compact -> Compaction produces a single range for both delta and delete delta
    // That is, both delta and delete_deltas would be compacted into delta_3_5 and delete_delta_3_5
    // even though there are only two delta_3_3, delta_4_4 and one delete_delta_5_5.
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
    runWorker(conf);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    // Verify that we have got correct set of deltas.
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
    String[] deltas = new String[stat.length];
    Path minorCompactedDelta = null;
    for (int i = 0; i < deltas.length; i++) {
        deltas[i] = stat[i].getPath().getName();
        if (deltas[i].equals("delta_0000001_0000003_v0000006")) {
            minorCompactedDelta = stat[i].getPath();
        }
    }
    Arrays.sort(deltas);
    String[] expectedDeltas = new String[] { "delta_0000001_0000001_0000", "delta_0000001_0000003_v0000006", "delta_0000002_0000002_0000" };
    if (!Arrays.deepEquals(expectedDeltas, deltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeltas) + ", found: " + Arrays.toString(deltas));
    }
    CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { minorCompactedDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 2L, null, 1);
    // Verify that we have got correct set of delete_deltas.
    FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
    String[] deleteDeltas = new String[deleteDeltaStat.length];
    Path minorCompactedDeleteDelta = null;
    for (int i = 0; i < deleteDeltas.length; i++) {
        deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
        if (deleteDeltas[i].equals("delete_delta_0000001_0000003_v0000006")) {
            minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
        }
    }
    Arrays.sort(deleteDeltas);
    String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000003_v0000006", "delete_delta_0000003_0000003_0000" };
    if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
    }
    CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 2L, 2L, null, 1);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 49 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method assertAndCompactCleanAbort.

private void assertAndCompactCleanAbort(String dbName, String tblName, boolean partialAbort, boolean singleSession) throws Exception {
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()));
    if (3 != stat.length) {
        Assert.fail("Expecting three directories corresponding to three partitions, FileStatus[] stat " + Arrays.toString(stat));
    }
    int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS where TC_OPERATION_TYPE='i'");
    // We should have two rows corresponding to the two aborted transactions
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), partialAbort ? 1 : 2, count);
    runInitiator(conf);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    // Only one job is added to the queue per table. This job corresponds to all the entries for a particular table
    // with rows in TXN_COMPONENTS
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 1, count);
    runWorker(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(1, rsp.getCompacts().size());
    Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState());
    Assert.assertEquals("cws", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(CompactionType.MINOR, rsp.getCompacts().get(0).getType());
    runCleaner(conf);
    // After the cleaner runs TXN_COMPONENTS and COMPACTION_QUEUE should have zero rows, also the folders should have been deleted.
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), (singleSession && partialAbort) ? 1 : 0, count);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 0, count);
    RemoteIterator it = fs.listFiles(new Path(table.getSd().getLocation()), true);
    if (it.hasNext() && !partialAbort) {
        Assert.fail("Expected cleaner to drop aborted delta & base directories, FileStatus[] stat " + Arrays.toString(stat));
    }
    rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(1, rsp.getCompacts().size());
    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
    Assert.assertEquals("cws", rsp.getCompacts().get(0).getTablename());
    Assert.assertEquals(CompactionType.MINOR, rsp.getCompacts().get(0).getType());
}
Also used : Path(org.apache.hadoop.fs.Path) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore)

Example 50 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method runMajorCompaction.

private void runMajorCompaction(String dbName, String tblName, String... partNames) throws Exception {
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setConf(conf);
    t.init(new AtomicBoolean(true));
    if (partNames.length == 0) {
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
        t.run();
    } else {
        for (String partName : partNames) {
            CompactionRequest cr = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
            cr.setPartitionname(partName);
            txnHandler.compact(cr);
            t.run();
        }
    }
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)61 Test (org.junit.Test)52 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)36 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)36 Path (org.apache.hadoop.fs.Path)26 FileSystem (org.apache.hadoop.fs.FileSystem)24 HiveConf (org.apache.hadoop.hive.conf.HiveConf)22 FileStatus (org.apache.hadoop.fs.FileStatus)16 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)16 Table (org.apache.hadoop.hive.metastore.api.Table)16 ArrayList (java.util.ArrayList)15 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)15 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)15 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)14 HashMap (java.util.HashMap)13 IOException (java.io.IOException)12 List (java.util.List)11 Map (java.util.Map)11 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 TxnUtils (org.apache.hadoop.hive.metastore.txn.TxnUtils)10