Search in sources :

Example 41 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionShouldBeRefusedOnTablesWithRawData.

@Test
public void testMinorCompactionShouldBeRefusedOnTablesWithRawData() throws Exception {
    conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
    // Set delta numbuer threshold to 2 to avoid skipping compaction because of too few deltas
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
    // Set delta percentage to a high value to suppress selecting major compression based on that
    conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 1000f);
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    final String dbName = "default";
    final String origTableName = "compaction_test";
    final String testTableName = "imported";
    executeStatementOnDriver("drop table if exists " + origTableName, driver);
    executeStatementOnDriver("drop table if exists " + testTableName, driver);
    executeStatementOnDriver("CREATE TABLE " + origTableName + "(id string, value string) CLUSTERED BY(id) " + "INTO 10 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + origTableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten')," + "('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen')," + "('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
    execSelectAndDumpData("select * from " + origTableName, driver, "Dumping data for " + origTableName + " after load:");
    executeStatementOnDriver("export table " + origTableName + " to '/tmp/temp_acid'", driver);
    executeStatementOnDriver("import table " + testTableName + " from '/tmp/temp_acid'", driver);
    executeStatementOnDriver("insert into " + testTableName + " values ('21', 'value21'),('84', 'value84')," + "('66', 'value66'),('54', 'value54')", driver);
    executeStatementOnDriver("insert into " + testTableName + " values ('22', 'value22'),('34', 'value34')," + "('35', 'value35')", driver);
    executeStatementOnDriver("insert into " + testTableName + " values ('75', 'value75'),('99', 'value99')", driver);
    // Prevent initiator from submitting the compaction requests
    TxnStore mockedHandler = spy(txnHandler);
    doThrow(new RuntimeException("")).when(mockedHandler).compact(nullable(CompactionRequest.class));
    Initiator initiator = new Initiator();
    initiator.setConf(conf);
    initiator.init(new AtomicBoolean(true));
    FieldSetter.setField(initiator, MetaStoreCompactorThread.class.getDeclaredField("txnHandler"), mockedHandler);
    // Run initiator and capture compaction requests
    initiator.run();
    // Check captured compaction request and if the type for the table was MAJOR
    ArgumentCaptor<CompactionRequest> requests = ArgumentCaptor.forClass(CompactionRequest.class);
    verify(mockedHandler).compact(requests.capture());
    Assert.assertTrue(requests.getAllValues().stream().anyMatch(r -> r.getTablename().equals(testTableName) && r.getType().equals(CompactionType.MAJOR)));
    // Try to do a minor compaction directly
    CompactionRequest rqst = new CompactionRequest(dbName, testTableName, CompactionType.MINOR);
    txnHandler.compact(rqst);
    runWorker(conf);
    // Check if both compactions were failed with the expected error message
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    if (2 != compacts.size()) {
        Assert.fail("Expecting 2 rows and found " + compacts.size() + " files " + compacts);
    }
    Assert.assertEquals("did not initiate", compacts.get(0).getState());
    Assert.assertTrue(compacts.get(0).getErrorMessage().startsWith("Caught exception while trying to determine if we should compact"));
    Assert.assertEquals("refused", compacts.get(1).getState());
    Assert.assertTrue(compacts.get(1).getErrorMessage().startsWith("Query based Minor compaction is not possible for full acid tables having raw format (non-acid) data in them."));
}
Also used : DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) TestHiveProtoLoggingHook(org.apache.hadoop.hive.ql.hooks.TestHiveProtoLoggingHook) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) TxnCommandsBaseForTests.runWorker(org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker) Arrays(java.util.Arrays) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) HiveProtoLoggingHook(org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) StringUtils(org.apache.commons.lang3.StringUtils) ExecutionMode(org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook.ExecutionMode) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AcidDirectory(org.apache.hadoop.hive.ql.io.AcidDirectory) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TestCompactor.executeStatementOnDriver(org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) RecordReaderImpl(org.apache.orc.impl.RecordReaderImpl) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) OrcProto(org.apache.orc.OrcProto) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) OrcFile(org.apache.orc.OrcFile) StripeInformation(org.apache.orc.StripeInformation) ArrayList(java.util.ArrayList) Reader(org.apache.orc.Reader) Lists(com.google.common.collect.Lists) ArgumentCaptor(org.mockito.ArgumentCaptor) Hive(org.apache.hadoop.hive.ql.metadata.Hive) ProtoMessageReader(org.apache.tez.dag.history.logging.proto.ProtoMessageReader) RecordReader(org.apache.orc.RecordReader) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) HiveHookEvents(org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents) Test(org.junit.Test) CompactorTestUtil.executeStatementOnDriverAndReturnResults(org.apache.hadoop.hive.ql.txn.compactor.CompactorTestUtil.executeStatementOnDriverAndReturnResults) Table(org.apache.hadoop.hive.metastore.api.Table) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Mockito(org.mockito.Mockito) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) TxnCommandsBaseForTests(org.apache.hadoop.hive.ql.TxnCommandsBaseForTests) Assert(org.junit.Assert) Collections(java.util.Collections) TestCompactor.execSelectAndDumpData(org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 42 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestMaterializedViewRebuild method testWhenCompactInsertOnlySourceTableThenIncrementalMVRebuildIsNotAvailable.

@Test
public void testWhenCompactInsertOnlySourceTableThenIncrementalMVRebuildIsNotAvailable() throws Exception {
    createTestSchema(", 'transactional_properties'='insert_only'");
    executeStatementOnDriver("insert into " + TABLE1 + "(a,b,c) values (3, 'three', 3.3)", driver);
    CompactorTestUtil.runCompaction(conf, "default", TABLE1, CompactionType.MAJOR, true);
    CompactorTestUtil.runCleaner(conf);
    verifySuccessfulCompaction(1);
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.cleanTxnToWriteIdTable();
    List<String> result = execSelectAndDumpData("explain cbo alter materialized view " + MV1 + " rebuild", driver, "");
    Assert.assertEquals(FULL_REBUILD_PLAN, result);
    executeStatementOnDriver("alter materialized view " + MV1 + " rebuild", driver);
    result = execSelectAndDumpData("select * from " + MV1, driver, "");
    assertResult(EXPECTED_RESULT, result);
    result = execSelectAndDumpData("explain cbo select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver, "");
    Assert.assertEquals(Arrays.asList("CBO PLAN:", "HiveTableScan(table=[[default, " + MV1 + "]], table:alias=[default." + MV1 + "])", ""), result);
}
Also used : TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 43 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method testHeartbeatShutdownOnFailedCompaction.

@Test
public void testHeartbeatShutdownOnFailedCompaction() throws Exception {
    String dbName = "default";
    String tblName = "compaction_test";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
    connection.beginTransaction();
    connection.write("55, 'London'".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("56, 'Paris'".getBytes());
    connection.commitTransaction();
    connection.close();
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
    execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    // Commit will throw an exception
    IMetaStoreClient mockedClient = Mockito.spy(new HiveMetaStoreClient(conf));
    doThrow(new RuntimeException("Simulating RuntimeException from CompactionTxn.commit")).when(mockedClient).commitTxn(Mockito.anyLong());
    doAnswer(invocation -> {
        Object o = invocation.callRealMethod();
        // Check if the heartbeating is running
        Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().anyMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
        return o;
    }).when(mockedClient).openTxn(any(), any());
    // Do a major compaction
    CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
    rqst.setPartitionname("bkt=0");
    txnHandler.compact(rqst);
    Worker worker = Mockito.spy(new Worker());
    worker.setThreadId((int) worker.getId());
    worker.setConf(conf);
    worker.init(new AtomicBoolean(true));
    FieldSetter.setField(worker, RemoteCompactorThread.class.getDeclaredField("msc"), mockedClient);
    worker.run();
    // Check if the transaction was opened
    verify(mockedClient, times(1)).openTxn(any(), any());
    // Check if the heartbeating is properly terminated
    Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().noneMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
}
Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) FileStatus(org.apache.hadoop.fs.FileStatus) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) Mockito.doThrow(org.mockito.Mockito.doThrow) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OrcConf(org.apache.orc.OrcConf) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) After(org.junit.After) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileUtil(org.apache.hadoop.fs.FileUtil) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IDriver(org.apache.hadoop.hive.ql.IDriver) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Retry(org.apache.hive.common.util.Retry) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) HCatUtil(org.apache.hive.hcatalog.common.HCatUtil) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) VISIBILITY_PATTERN(org.apache.hadoop.hive.common.AcidConstants.VISIBILITY_PATTERN) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) TestTxnCommands2.runCleaner(org.apache.hadoop.hive.ql.TestTxnCommands2.runCleaner) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) TestTxnCommands2.runInitiator(org.apache.hadoop.hive.ql.TestTxnCommands2.runInitiator) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) Constants(org.apache.hadoop.hive.conf.Constants) Before(org.junit.Before) Hive(org.apache.hadoop.hive.ql.metadata.Hive) StreamingException(org.apache.hive.streaming.StreamingException) Logger(org.slf4j.Logger) FileWriter(java.io.FileWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) File(java.io.File) Table(org.apache.hadoop.hive.metastore.api.Table) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) Assert(org.junit.Assert) Collections(java.util.Collections) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Assert.assertEquals(org.junit.Assert.assertEquals) TemporaryFolder(org.junit.rules.TemporaryFolder) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 44 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method testStatsAfterCompactionTbl.

/**
 * After each major compaction, stats need to be updated on the table
 * 1. create an ORC backed table (Orc is currently required by ACID)
 * 2. populate with data
 * 3. compute stats
 * 4. Trigger major compaction (which should update stats)
 * 5. check that stats have been updated
 *
 * @throws Exception todo:
 *                   4. add a test with sorted table?
 */
@Test
public void testStatsAfterCompactionTbl() throws Exception {
    // as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
    String dbName = "default";
    String tblName = "compaction_test";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(55, 'London')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(56, 'Paris')", driver);
    execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    Table table = msClient.getTable(dbName, tblName);
    // compute stats before compaction
    CompactionInfo ci = new CompactionInfo(dbName, tblName, null, CompactionType.MAJOR);
    Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
    // Check basic stats are collected
    Map<String, String> parameters = Hive.get().getTable(tblName).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "1434", parameters.get("totalSize"));
    // Do a major compaction
    CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
    txnHandler.compact(rqst);
    runWorker(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    if (1 != compacts.size()) {
        Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
    }
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    // Check basic stats are updated
    parameters = Hive.get().getTable(tblName).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "776", parameters.get("totalSize"));
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 45 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method autoCompactOnStreamingIngestWithDynamicPartition.

@Test
public void autoCompactOnStreamingIngestWithDynamicPartition() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "string:int";
    String agentInfo = "UT_" + Thread.currentThread().getName();
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a STRING) " + // currently ACID requires table to be bucketed
    " PARTITIONED BY (b INT)" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer1 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StreamingConnection connection1 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer1).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
    StreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer2).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
    try {
        connection1.beginTransaction();
        connection1.write("1,1".getBytes());
        connection1.commitTransaction();
        connection1.beginTransaction();
        connection1.write("1,1".getBytes());
        connection1.commitTransaction();
        connection1.close();
        conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
        runInitiator(conf);
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
        List<ShowCompactResponseElement> compacts1 = rsp.getCompacts();
        Assert.assertEquals(1, compacts1.size());
        SortedSet<String> partNames1 = new TreeSet<String>();
        verifyCompactions(compacts1, partNames1, tblName);
        List<String> names1 = new ArrayList<String>(partNames1);
        Assert.assertEquals("b=1", names1.get(0));
        runWorker(conf);
        runCleaner(conf);
        connection2.beginTransaction();
        connection2.write("1,1".getBytes());
        connection2.commitTransaction();
        connection2.beginTransaction();
        connection2.write("1,1".getBytes());
        connection2.commitTransaction();
        connection2.close();
        runInitiator(conf);
        List<ShowCompactResponseElement> compacts2 = rsp.getCompacts();
        Assert.assertEquals(1, compacts2.size());
        SortedSet<String> partNames2 = new TreeSet<String>();
        verifyCompactions(compacts2, partNames2, tblName);
        List<String> names2 = new ArrayList<String>(partNames2);
        Assert.assertEquals("b=1", names2.get(0));
        runWorker(conf);
        runCleaner(conf);
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        String tablePath = table.getSd().getLocation();
        String partName = "b=1";
        Path partPath = new Path(tablePath, partName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(partPath, AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals("base_0000005_v0000009", name);
        CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
    } finally {
        if (connection1 != null) {
            connection1.close();
        }
        if (connection2 != null) {
            connection2.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ArrayList(java.util.ArrayList) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)61 Test (org.junit.Test)52 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)36 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)36 Path (org.apache.hadoop.fs.Path)26 FileSystem (org.apache.hadoop.fs.FileSystem)24 HiveConf (org.apache.hadoop.hive.conf.HiveConf)22 FileStatus (org.apache.hadoop.fs.FileStatus)16 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)16 Table (org.apache.hadoop.hive.metastore.api.Table)16 ArrayList (java.util.ArrayList)15 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)15 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)15 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)14 HashMap (java.util.HashMap)13 IOException (java.io.IOException)12 List (java.util.List)11 Map (java.util.Map)11 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 TxnUtils (org.apache.hadoop.hive.metastore.txn.TxnUtils)10