Search in sources :

Example 1 with TestTxnCommands2.runCleaner

use of org.apache.hadoop.hive.ql.TestTxnCommands2.runCleaner in project hive by apache.

the class TestCompactor method testCompactionOnDataLoadedInPath.

/**
 * Tests compaction of tables that were populated by LOAD DATA INPATH statements.
 *
 * In this scenario original ORC files are a structured in the following way:
 * comp3
 * |--delta_0000001_0000001_0000
 *    |--000000_0
 * |--delta_0000002_0000002_0000
 *    |--000000_0
 *    |--000001_0
 *
 * ..where comp3 table is not bucketed.
 *
 * @throws Exception
 */
@Test
public void testCompactionOnDataLoadedInPath() throws Exception {
    // Setup of LOAD INPATH scenario.
    executeStatementOnDriver("drop table if exists comp0", driver);
    executeStatementOnDriver("drop table if exists comp1", driver);
    executeStatementOnDriver("drop table if exists comp3", driver);
    executeStatementOnDriver("create external table comp0 (a string)", driver);
    executeStatementOnDriver("insert into comp0 values ('1111111111111')", driver);
    executeStatementOnDriver("insert into comp0 values ('2222222222222')", driver);
    executeStatementOnDriver("insert into comp0 values ('3333333333333')", driver);
    executeStatementOnDriver("create external table comp1 stored as orc as select * from comp0", driver);
    executeStatementOnDriver("create table comp3 (a string) stored as orc " + "TBLPROPERTIES ('transactional'='true')", driver);
    IMetaStoreClient hmsClient = new HiveMetaStoreClient(conf);
    Table table = hmsClient.getTable("default", "comp1");
    FileSystem fs = FileSystem.get(conf);
    Path path000 = fs.listStatus(new Path(table.getSd().getLocation()))[0].getPath();
    Path path001 = new Path(path000.toString().replace("000000", "000001"));
    Path path002 = new Path(path000.toString().replace("000000", "000002"));
    fs.copyFromLocalFile(path000, path001);
    fs.copyFromLocalFile(path000, path002);
    executeStatementOnDriver("load data inpath '" + path002.toString() + "' into table comp3", driver);
    executeStatementOnDriver("load data inpath '" + path002.getParent().toString() + "' into table comp3", driver);
    // Run compaction.
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    CompactionRequest rqst = new CompactionRequest("default", "comp3", CompactionType.MAJOR);
    txnHandler.compact(rqst);
    runWorker(conf);
    ShowCompactRequest scRqst = new ShowCompactRequest();
    List<ShowCompactResponseElement> compacts = txnHandler.showCompact(scRqst).getCompacts();
    assertEquals(1, compacts.size());
    assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
    runCleaner(conf);
    compacts = txnHandler.showCompact(scRqst).getCompacts();
    assertEquals(1, compacts.size());
    assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
    // Check compacted content and file structure.
    table = hmsClient.getTable("default", "comp3");
    List<String> rs = execSelectAndDumpData("select * from comp3", driver, "select");
    assertEquals(9, rs.size());
    assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
    assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
    assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
    FileStatus[] files = fs.listStatus(new Path(table.getSd().getLocation()));
    // base dir
    assertEquals(1, files.length);
    assertEquals("base_0000002_v0000012", files[0].getPath().getName());
    files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
    // files
    assertEquals(2, files.length);
    Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
    Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
    // Another insert into the newly compacted table.
    executeStatementOnDriver("insert into comp3 values ('4444444444444')", driver);
    // Compact with extra row too.
    txnHandler.compact(rqst);
    runWorker(conf);
    compacts = txnHandler.showCompact(scRqst).getCompacts();
    assertEquals(2, compacts.size());
    assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
    runCleaner(conf);
    compacts = txnHandler.showCompact(scRqst).getCompacts();
    assertEquals(2, compacts.size());
    assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
    // Check compacted content and file structure.
    rs = execSelectAndDumpData("select * from comp3", driver, "select");
    assertEquals(10, rs.size());
    assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
    assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
    assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
    assertEquals(1, rs.stream().filter(p -> "4444444444444".equals(p)).count());
    files = fs.listStatus(new Path(table.getSd().getLocation()));
    // base dir
    assertEquals(1, files.length);
    assertEquals("base_0000004_v0000016", files[0].getPath().getName());
    files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
    // files
    assertEquals(2, files.length);
    Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
    Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
}
Also used : Path(org.apache.hadoop.fs.Path) DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) FileStatus(org.apache.hadoop.fs.FileStatus) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) Mockito.doThrow(org.mockito.Mockito.doThrow) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OrcConf(org.apache.orc.OrcConf) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) After(org.junit.After) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileUtil(org.apache.hadoop.fs.FileUtil) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IDriver(org.apache.hadoop.hive.ql.IDriver) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Retry(org.apache.hive.common.util.Retry) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) HCatUtil(org.apache.hive.hcatalog.common.HCatUtil) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) VISIBILITY_PATTERN(org.apache.hadoop.hive.common.AcidConstants.VISIBILITY_PATTERN) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) TestTxnCommands2.runCleaner(org.apache.hadoop.hive.ql.TestTxnCommands2.runCleaner) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) TestTxnCommands2.runInitiator(org.apache.hadoop.hive.ql.TestTxnCommands2.runInitiator) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) Constants(org.apache.hadoop.hive.conf.Constants) Before(org.junit.Before) Hive(org.apache.hadoop.hive.ql.metadata.Hive) StreamingException(org.apache.hive.streaming.StreamingException) Logger(org.slf4j.Logger) FileWriter(java.io.FileWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) File(java.io.File) Table(org.apache.hadoop.hive.metastore.api.Table) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) Assert(org.junit.Assert) Collections(java.util.Collections) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Assert.assertEquals(org.junit.Assert.assertEquals) TemporaryFolder(org.junit.rules.TemporaryFolder) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Aggregations

Lists (com.google.common.collect.Lists)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Random (java.util.Random)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 TimeUnit (java.util.concurrent.TimeUnit)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 FileUtil (org.apache.hadoop.fs.FileUtil)1 Path (org.apache.hadoop.fs.Path)1