Search in sources :

Example 1 with CompactorMR

use of org.apache.hadoop.hive.ql.txn.compactor.CompactorMR in project hive by apache.

the class TestTxnCommands3 method testMinorCompactionAbortLeftoverFiles.

@Test
public void testMinorCompactionAbortLeftoverFiles() throws Exception {
    MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true);
    dropTable(new String[] { "T" });
    // note: transaction names T1, T2, etc below, are logical, the actual txnid will be different
    runStatementOnDriver("create table T (a int, b int) stored as orc");
    // makes delta_1_1 in T1
    runStatementOnDriver("insert into T values(0,2)");
    // makes delta_2_2 in T2
    runStatementOnDriver("insert into T values(1,4)");
    // makes delta/(delete_delta)_3_3 in T3
    runStatementOnDriver("update T set a=3 where b=2");
    runStatementOnDriver("alter table T compact 'minor'");
    // create failed compaction attempt so that compactor txn is aborted
    CompactorMR compactorMr = Mockito.spy(new CompactorMR());
    Mockito.doAnswer((Answer<Void>) invocationOnMock -> {
        invocationOnMock.callRealMethod();
        throw new RuntimeException("Will cause CompactorMR to fail all opening txn and creating directories for compaction.");
    }).when(compactorMr).run(any(), any(), any(), any(), any(), any(), any(), any(), any());
    Worker worker = Mockito.spy(new Worker());
    worker.setConf(hiveConf);
    worker.init(new AtomicBoolean(true));
    Mockito.doReturn(compactorMr).when(worker).getMrCompactor();
    worker.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0th compaction state", TxnStore.FAILED_RESPONSE, resp.getCompacts().get(0).getState());
    GetOpenTxnsResponse openResp = txnHandler.getOpenTxns();
    Assert.assertEquals(openResp.toString(), 1, openResp.getOpen_txnsSize());
    // check that the compactor txn is aborted
    Assert.assertTrue(openResp.toString(), BitSet.valueOf(openResp.getAbortedBits()).get(0));
    Assert.assertEquals(0, TestTxnDbUtil.countQueryAgent(hiveConf, "SELECT count(*) FROM hive_locks WHERE hl_txnid=" + openResp.getOpen_txns().get(0)));
    FileSystem fs = FileSystem.get(hiveConf);
    Path warehousePath = new Path(getWarehouseDir());
    FileStatus[] actualList = fs.listStatus(new Path(warehousePath + "/t"), FileUtils.HIDDEN_FILES_PATH_FILTER);
    // we expect all the t/base_* files to be removed by the compactor failure
    String[] expectedList = new String[] { "/t/delta_0000001_0000001_0000", "/t/delta_0000002_0000002_0000", "/t/delete_delta_0000003_0000003_0000", "/t/delta_0000003_0000003_0000" };
    checkExpectedFiles(actualList, expectedList, warehousePath.toString());
    // delete metadata about aborted txn from txn_components and files (if any)
    runCleaner(hiveConf);
}
Also used : HMSMetricsListener(org.apache.hadoop.hive.metastore.HMSMetricsListener) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) TestVectorizedOrcAcidRowBatchReader(org.apache.hadoop.hive.ql.io.orc.TestVectorizedOrcAcidRowBatchReader) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileStatus(org.apache.hadoop.fs.FileStatus) HashSet(java.util.HashSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) Answer(org.mockito.stubbing.Answer) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TxnManagerFactory(org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory) TestDbTxnManager2.swapTxnManager(org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.swapTxnManager) Logger(org.slf4j.Logger) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) Test(org.junit.Test) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) File(java.io.File) Matchers.any(org.mockito.Matchers.any) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) Mockito(org.mockito.Mockito) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) FileUtils(org.apache.hadoop.hive.common.FileUtils) BitSet(java.util.BitSet) Assert(org.junit.Assert) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) FileSystem(org.apache.hadoop.fs.FileSystem) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 2 with CompactorMR

use of org.apache.hadoop.hive.ql.txn.compactor.CompactorMR in project hive by apache.

the class TestTxnCommands3 method testMajorCompactionAbortLeftoverFiles.

@Test
public void testMajorCompactionAbortLeftoverFiles() throws Exception {
    MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true);
    dropTable(new String[] { "T" });
    // note: transaction names T1, T2, etc below, are logical, the actual txnid will be different
    runStatementOnDriver("create table T (a int, b int) stored as orc");
    // makes delta_1_1 in T1
    runStatementOnDriver("insert into T values(0,2)");
    // makes delta_2_2 in T2
    runStatementOnDriver("insert into T values(1,4)");
    runStatementOnDriver("alter table T compact 'minor'");
    // create failed compaction attempt so that compactor txn is aborted
    CompactorMR compactorMr = Mockito.spy(new CompactorMR());
    Mockito.doAnswer((Answer<Void>) invocationOnMock -> {
        invocationOnMock.callRealMethod();
        throw new RuntimeException("Will cause CompactorMR to fail all opening txn and creating directories for compaction.");
    }).when(compactorMr).run(any(), any(), any(), any(), any(), any(), any(), any(), any());
    Worker worker = Mockito.spy(new Worker());
    worker.setConf(hiveConf);
    worker.init(new AtomicBoolean(true));
    Mockito.doReturn(compactorMr).when(worker).getMrCompactor();
    worker.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0th compaction state", TxnStore.FAILED_RESPONSE, resp.getCompacts().get(0).getState());
    GetOpenTxnsResponse openResp = txnHandler.getOpenTxns();
    Assert.assertEquals(openResp.toString(), 1, openResp.getOpen_txnsSize());
    // check that the compactor txn is aborted
    Assert.assertTrue(openResp.toString(), BitSet.valueOf(openResp.getAbortedBits()).get(0));
    Assert.assertEquals(0, TestTxnDbUtil.countQueryAgent(hiveConf, "SELECT count(*) FROM hive_locks WHERE hl_txnid=" + openResp.getOpen_txns().get(0)));
    FileSystem fs = FileSystem.get(hiveConf);
    Path warehousePath = new Path(getWarehouseDir());
    FileStatus[] actualList = fs.listStatus(new Path(warehousePath + "/t"), FileUtils.HIDDEN_FILES_PATH_FILTER);
    // we expect all the t/base_* files to be removed by the compactor failure
    String[] expectedList = new String[] { "/t/delta_0000001_0000001_0000", "/t/delta_0000002_0000002_0000" };
    checkExpectedFiles(actualList, expectedList, warehousePath.toString());
    // delete metadata about aborted txn from txn_components and files (if any)
    runCleaner(hiveConf);
}
Also used : HMSMetricsListener(org.apache.hadoop.hive.metastore.HMSMetricsListener) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) TestVectorizedOrcAcidRowBatchReader(org.apache.hadoop.hive.ql.io.orc.TestVectorizedOrcAcidRowBatchReader) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileStatus(org.apache.hadoop.fs.FileStatus) HashSet(java.util.HashSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) Answer(org.mockito.stubbing.Answer) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TxnManagerFactory(org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory) TestDbTxnManager2.swapTxnManager(org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.swapTxnManager) Logger(org.slf4j.Logger) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) Test(org.junit.Test) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) File(java.io.File) Matchers.any(org.mockito.Matchers.any) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) Mockito(org.mockito.Mockito) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) FileUtils(org.apache.hadoop.hive.common.FileUtils) BitSet(java.util.BitSet) Assert(org.junit.Assert) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) FileSystem(org.apache.hadoop.fs.FileSystem) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 3 with CompactorMR

use of org.apache.hadoop.hive.ql.txn.compactor.CompactorMR in project hive by apache.

the class TestTxnCommands2 method execDDLOpAndCompactionConcurrently.

private void execDDLOpAndCompactionConcurrently(String opType, boolean isPartioned) throws Exception {
    String tblName = "hive12352";
    String partName = "test";
    runStatementOnDriver("DROP TABLE if exists " + tblName);
    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING)" + (isPartioned ? "partitioned by (p STRING)" : "") + " STORED AS ORC  TBLPROPERTIES ( 'transactional'='true' )");
    // create some data
    runStatementOnDriver("INSERT INTO " + tblName + (isPartioned ? " PARTITION (p='" + partName + "')" : "") + " VALUES (1, 'foo'),(2, 'bar'),(3, 'baz')");
    runStatementOnDriver("UPDATE " + tblName + " SET b = 'blah' WHERE a = 3");
    // run Worker to execute compaction
    CompactionRequest req = new CompactionRequest("default", tblName, CompactionType.MAJOR);
    if (isPartioned) {
        req.setPartitionname("p=" + partName);
    }
    txnHandler.compact(req);
    CompactorMR compactorMr = Mockito.spy(new CompactorMR());
    Mockito.doAnswer((Answer<JobConf>) invocationOnMock -> {
        JobConf job = (JobConf) invocationOnMock.callRealMethod();
        job.setMapperClass(SlowCompactorMap.class);
        return job;
    }).when(compactorMr).createBaseJobConf(any(), any(), any(), any(), any(), any());
    Worker worker = Mockito.spy(new Worker());
    worker.setConf(hiveConf);
    worker.init(new AtomicBoolean(true));
    Mockito.doReturn(compactorMr).when(worker).getMrCompactor();
    CompletableFuture<Void> compactionJob = CompletableFuture.runAsync(worker);
    Thread.sleep(1000);
    int compHistory = 0;
    switch(opType) {
        case "DROP_TABLE":
            runStatementOnDriver("DROP TABLE " + tblName);
            runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " STORED AS ORC  TBLPROPERTIES ( 'transactional'='true' )");
            break;
        case "TRUNCATE_TABLE":
            runStatementOnDriver("TRUNCATE TABLE " + tblName);
            compHistory = 1;
            break;
        case "DROP_PARTITION":
            {
                runStatementOnDriver("ALTER TABLE " + tblName + " DROP PARTITION (p='" + partName + "')");
                runStatementOnDriver("ALTER TABLE " + tblName + " ADD PARTITION (p='" + partName + "')");
                break;
            }
        case "TRUNCATE_PARTITION":
            {
                runStatementOnDriver("TRUNCATE TABLE " + tblName + " PARTITION (p='" + partName + "')");
                compHistory = 1;
                break;
            }
    }
    compactionJob.join();
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", compHistory, resp.getCompactsSize());
    if (compHistory != 0) {
        Assert.assertEquals("Unexpected 0th compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    }
    GetOpenTxnsResponse openResp = txnHandler.getOpenTxns();
    Assert.assertEquals(openResp.toString(), 0, openResp.getOpen_txnsSize());
    FileSystem fs = FileSystem.get(hiveConf);
    FileStatus[] status = fs.listStatus(new Path(getWarehouseDir() + "/" + tblName + (isPartioned ? "/p=" + partName : "")), FileUtils.HIDDEN_FILES_PATH_FILTER);
    Assert.assertEquals(0, status.length);
}
Also used : CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Arrays(java.util.Arrays) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) OpenTxnsResponse(org.apache.hadoop.hive.metastore.api.OpenTxnsResponse) Writable(org.apache.hadoop.io.Writable) FileStatus(org.apache.hadoop.fs.FileStatus) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) BucketCodec(org.apache.hadoop.hive.ql.io.BucketCodec) CommitTxnRequest(org.apache.hadoop.hive.metastore.api.CommitTxnRequest) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) Map(java.util.Map) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) OpenTxnRequest(org.apache.hadoop.hive.metastore.api.OpenTxnRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Set(java.util.Set) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) FileUtils(org.apache.hadoop.hive.common.FileUtils) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) OrcFile(org.apache.orc.OrcFile) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AcidOpenTxnsCounterService(org.apache.hadoop.hive.metastore.txn.AcidOpenTxnsCounterService) Reader(org.apache.orc.Reader) Answer(org.mockito.stubbing.Answer) AcidHouseKeeperService(org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService) ExpectedException(org.junit.rules.ExpectedException) TxnManagerFactory(org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory) Logger(org.slf4j.Logger) HiveConf(org.apache.hadoop.hive.conf.HiveConf) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) Test(org.junit.Test) Field(java.lang.reflect.Field) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) JobConf(org.apache.hadoop.mapred.JobConf) Mockito(org.mockito.Mockito) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Assert(org.junit.Assert) MetastoreTaskThread(org.apache.hadoop.hive.metastore.MetastoreTaskThread) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactorMR(org.apache.hadoop.hive.ql.txn.compactor.CompactorMR) FileSystem(org.apache.hadoop.fs.FileSystem) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) GetOpenTxnsResponse(org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

File (java.io.File)3 HashSet (java.util.HashSet)3 List (java.util.List)3 Set (java.util.Set)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 FileUtils (org.apache.hadoop.hive.common.FileUtils)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 GetOpenTxnsResponse (org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse)3 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)3 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)3 MetastoreConf (org.apache.hadoop.hive.metastore.conf.MetastoreConf)3 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 TestTxnDbUtil (org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil)3 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)3 TxnManagerFactory (org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory)3 BitSet (java.util.BitSet)2 HMSMetricsListener (org.apache.hadoop.hive.metastore.HMSMetricsListener)2