Search in sources :

Example 11 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testCleanAbortCompactSeveralTables.

@Test
public void testCleanAbortCompactSeveralTables() throws Exception {
    String dbName = "default";
    String tblName1 = "cws1";
    String tblName2 = "cws2";
    HiveStreamingConnection connection1 = prepareTableAndConnection(dbName, tblName1, 1);
    HiveStreamingConnection connection2 = prepareTableAndConnection(dbName, tblName2, 1);
    connection1.beginTransaction();
    connection1.write("1,1".getBytes());
    connection1.write("2,2".getBytes());
    connection1.abortTransaction();
    connection2.beginTransaction();
    connection2.write("1,1".getBytes());
    connection2.write("2,2".getBytes());
    connection2.abortTransaction();
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    FileSystem fs = FileSystem.get(conf);
    Table table1 = msClient.getTable(dbName, tblName1);
    FileStatus[] stat = fs.listStatus(new Path(table1.getSd().getLocation()));
    if (2 != stat.length) {
        Assert.fail("Expecting two directories corresponding to two partitions, FileStatus[] stat " + Arrays.toString(stat));
    }
    Table table2 = msClient.getTable(dbName, tblName2);
    stat = fs.listStatus(new Path(table2.getSd().getLocation()));
    if (2 != stat.length) {
        Assert.fail("Expecting two directories corresponding to two partitions, FileStatus[] stat " + Arrays.toString(stat));
    }
    int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS where TC_OPERATION_TYPE='i'");
    // We should have two rows corresponding to the two aborted transactions
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 2, count);
    runInitiator(conf);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    // Only one job is added to the queue per table. This job corresponds to all the entries for a particular table
    // with rows in TXN_COMPONENTS
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 2, count);
    runWorker(conf);
    runWorker(conf);
    runCleaner(conf);
    // After the cleaner runs TXN_COMPONENTS and COMPACTION_QUEUE should have zero rows, also the folders should have been deleted.
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 0, count);
    RemoteIterator it = fs.listFiles(new Path(table1.getSd().getLocation()), true);
    if (it.hasNext()) {
        Assert.fail("Expected cleaner to drop aborted delta & base directories, FileStatus[] stat " + Arrays.toString(stat));
    }
    connection1.close();
    connection2.close();
}
Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Path(org.apache.hadoop.fs.Path) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Aggregations

HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)11 Test (org.junit.Test)11 FileSystem (org.apache.hadoop.fs.FileSystem)4 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)4 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 Path (org.apache.hadoop.fs.Path)3 RemoteIterator (org.apache.hadoop.fs.RemoteIterator)3 Partition (org.apache.hadoop.hive.metastore.api.Partition)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)3 Lists (com.google.common.collect.Lists)2 File (java.io.File)2 FileWriter (java.io.FileWriter)2 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2