Search in sources :

Example 91 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class MsckOperation method execute.

@Override
public int execute() throws HiveException, IOException, TException {
    try {
        Msck msck = new Msck(false, false);
        msck.init(Msck.getMsckConf(context.getDb().getConf()));
        msck.updateExpressionProxy(getProxyClass(context.getDb().getConf()));
        TableName tableName = HiveTableName.of(desc.getTableName());
        long partitionExpirySeconds = -1L;
        try (HiveMetaStoreClient msc = new HiveMetaStoreClient(context.getConf())) {
            boolean msckEnablePartitionRetention = MetastoreConf.getBoolVar(context.getConf(), MetastoreConf.ConfVars.MSCK_REPAIR_ENABLE_PARTITION_RETENTION);
            if (msckEnablePartitionRetention) {
                Table table = msc.getTable(SessionState.get().getCurrentCatalog(), tableName.getDb(), tableName.getTable());
                String qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table);
                partitionExpirySeconds = PartitionManagementTask.getRetentionPeriodInSeconds(table);
                LOG.info("{} - Retention period ({}s) for partition is enabled for MSCK REPAIR..", qualifiedTableName, partitionExpirySeconds);
            }
        }
        MsckInfo msckInfo = new MsckInfo(SessionState.get().getCurrentCatalog(), tableName.getDb(), tableName.getTable(), desc.getFilterExp(), desc.getResFile(), desc.isRepairPartitions(), desc.isAddPartitions(), desc.isDropPartitions(), partitionExpirySeconds);
        return msck.repair(msckInfo);
    } catch (MetaException e) {
        LOG.error("Unable to create msck instance.", e);
        return 1;
    } catch (SemanticException e) {
        LOG.error("Msck failed.", e);
        return 1;
    }
}
Also used : HiveTableName(org.apache.hadoop.hive.ql.parse.HiveTableName) TableName(org.apache.hadoop.hive.common.TableName) MsckInfo(org.apache.hadoop.hive.metastore.MsckInfo) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) Msck(org.apache.hadoop.hive.metastore.Msck) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 92 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCrudCompactorOnTez method testIowMinorMajor.

/**
 * After running insert overwrite, followed by a minor compaction, major compaction was failing because minor
 * compaction was resulting in deltas named delta_1_y.
 */
@Test
public void testIowMinorMajor() throws Exception {
    String dbName = "default";
    String tableName = "testIowMinorMajor";
    // Create test table
    executeStatementOnDriver("CREATE TABLE " + tableName + " (id string, value string)" + "CLUSTERED BY(id) INTO 2 BUCKETS " + "STORED AS ORC TBLPROPERTIES('transactional'='true')", driver);
    // Find the location of the table
    IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
    Table table = metaStoreClient.getTable(dbName, tableName);
    FileSystem fs = FileSystem.get(conf);
    // Insert test data into test table
    executeStatementOnDriver("insert overwrite table " + tableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten'),('11','eleven')," + "('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen'),('17','seventeen')," + "('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
    executeStatementOnDriver("delete from " + tableName + " where id in ('2', '4', '12', '15')", driver);
    executeStatementOnDriver("delete from " + tableName + " where id in ('11', '10', '14', '5')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('21', 'value21'),('84', 'value84'),('66', 'value66'),('54', 'value54')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('22', 'value22'),('34', 'value34'),('35', 'value35')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('75', 'value75'),('99', 'value99')", driver);
    // Verify deltas
    Assert.assertEquals("Delta directories does not match", Arrays.asList("delta_0000004_0000004_0000", "delta_0000005_0000005_0000", "delta_0000006_0000006_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
    // Verify delete delta
    Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000002_0000002_0000", "delete_delta_0000003_0000003_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
    // Get all data before compaction is run
    TestDataProvider dataProvider = new TestDataProvider();
    List<String> expectedData = dataProvider.getAllData(tableName);
    // Run a compaction
    CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
    // Clean up resources
    CompactorTestUtil.runCleaner(conf);
    // Only 1 compaction should be in the response queue with succeeded state
    verifySuccessfulCompaction(1);
    // Verify deltas
    Assert.assertEquals("Delta directories does not match", Collections.singletonList("delta_0000002_0000006_v0000009"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
    // Verify delete delta
    Assert.assertEquals("Delete directories does not match", Collections.singletonList("delete_delta_0000002_0000006_v0000009"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
    // Verify all contents
    List<String> actualData = dataProvider.getAllData(tableName);
    Assert.assertEquals(expectedData, actualData);
    // Run a compaction
    CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, true);
    // Clean up resources
    CompactorTestUtil.runCleaner(conf);
    // 2 compactions should be in the response queue with succeeded state
    verifySuccessfulCompaction(2);
    // Should contain only one base directory now
    String expectedBase = "base_0000006_v0000023";
    Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
    // Check base dir contents
    List<String> expectedBucketFiles = Arrays.asList("bucket_00000", "bucket_00001");
    Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, expectedBase));
    // Check bucket file contents
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 1);
    // Verify all contents
    actualData = dataProvider.getAllData(tableName);
    Assert.assertEquals(expectedData, actualData);
    // Clean up
    dataProvider.dropTable(tableName);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 93 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionNotPartitionedWithoutBuckets.

@Test
public void testMinorCompactionNotPartitionedWithoutBuckets() throws Exception {
    String dbName = "default";
    String tableName = "testMinorCompaction";
    // Create test table
    TestDataProvider dataProvider = new TestDataProvider();
    dataProvider.createFullAcidTable(tableName, false, false);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tableName);
    FileSystem fs = FileSystem.get(conf);
    // Insert test data into test table
    dataProvider.insertTestData(tableName);
    // Get all data before compaction is run
    List<String> expectedData = dataProvider.getAllData(tableName);
    // Verify deltas
    Assert.assertEquals("Delta directories does not match", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
    // Verify delete delta
    Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000", "delete_delta_0000005_0000005_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
    // Run a compaction
    CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
    // Clean up resources
    CompactorTestUtil.runCleaner(conf);
    // Only 1 compaction should be in the response queue with succeeded state
    verifySuccessfulCompaction(1);
    // Verify delta directories after compaction
    List<String> actualDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
    Assert.assertEquals("Delta directories does not match after compaction", Collections.singletonList("delta_0000001_0000005_v0000009"), actualDeltasAfterComp);
    List<String> actualDeleteDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null);
    Assert.assertEquals("Delete delta directories does not match after compaction", Collections.singletonList("delete_delta_0000001_0000005_v0000009"), actualDeleteDeltasAfterComp);
    // Verify bucket files in delta dirs
    List<String> expectedBucketFiles = Collections.singletonList("bucket_00000");
    Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeltasAfterComp.get(0)));
    Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeleteDeltasAfterComp.get(0)));
    // Verify contents of bucket files.
    // Bucket 0
    List<String> expectedRsBucket0 = Arrays.asList("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t2\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t2\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":2}\t3\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":4}\t4\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":5}\t4\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t5\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":2}\t5\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":3}\t6\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":4}\t6\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":5}\t6\t4");
    List<String> rsBucket0 = dataProvider.getBucketData(tableName, "536870912");
    Assert.assertEquals(expectedRsBucket0, rsBucket0);
    // Verify all contents
    List<String> actualData = dataProvider.getAllData(tableName);
    Assert.assertEquals(expectedData, actualData);
    CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE), new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
    // Clean up
    dataProvider.dropTable(tableName);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 94 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCrudCompactorOnTez method testMajorCompactionNotPartitioned4Buckets.

/**
 * TestDataProvider uses 2 buckets, I want to test 4 buckets here.
 * @throws Exception
 */
@Test
public void testMajorCompactionNotPartitioned4Buckets() throws Exception {
    boolean originalEnableVersionFile = conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, false);
    String dbName = "default";
    String tblName = "testMajorCompaction";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("create transactional table " + tblName + " (a int, b int) clustered" + " by (a) into 4 buckets" + " stored as ORC TBLPROPERTIES('bucketing_version'='2', 'transactional'='true'," + " 'transactional_properties'='default')", driver);
    executeStatementOnDriver("insert into " + tblName + " values(1,2),(1,3),(1,4),(2,2),(2,3),(2,4)", driver);
    executeStatementOnDriver("insert into " + tblName + " values(3,2),(3,3),(3,4),(4,2),(4,3),(4,4)", driver);
    executeStatementOnDriver("delete from " + tblName + " where b = 2", driver);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    // Verify deltas (delta_0000001_0000001_0000, delta_0000002_0000002_0000) are present
    Assert.assertEquals("Delta directories does not match before compaction", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
    // Verify that delete delta (delete_delta_0000003_0000003_0000) is present
    Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
    List<String> expectedRsBucket0 = new ArrayList<>(Arrays.asList("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t2\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t2\t4"));
    List<String> expectedRsBucket1 = new ArrayList<>(Arrays.asList("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t1\t3", "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t1\t4", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t4\t3", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":2}\t4\t4"));
    List<String> expectedRsBucket2 = new ArrayList<>(Arrays.asList("{\"writeid\":2,\"bucketid\":537001984,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":537001984,\"rowid\":2}\t3\t4"));
    TestDataProvider testDataProvider = new TestDataProvider();
    List<String> preCompactionRsBucket0 = testDataProvider.getBucketData(tblName, "536870912");
    List<String> preCompactionRsBucket1 = testDataProvider.getBucketData(tblName, "536936448");
    List<String> preCompactionRsBucket2 = testDataProvider.getBucketData(tblName, "537001984");
    Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0, preCompactionRsBucket0);
    Assert.assertEquals("pre-compaction bucket 1", expectedRsBucket1, preCompactionRsBucket1);
    Assert.assertEquals("pre-compaction bucket 2", expectedRsBucket2, preCompactionRsBucket2);
    // Run major compaction and cleaner
    CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
    CompactorTestUtil.runCleaner(conf);
    verifySuccessfulCompaction(1);
    // Should contain only one base directory now
    String expectedBase = "base_0000003_v0000009";
    Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
    // Check files in base
    List<String> expectedBucketFiles = Arrays.asList("bucket_00000", "bucket_00001", "bucket_00002");
    Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, "base_0000003_v0000009"));
    // Check buckets contents
    Assert.assertEquals("post-compaction bucket 0", expectedRsBucket0, testDataProvider.getBucketData(tblName, "536870912"));
    Assert.assertEquals("post-compaction bucket 1", expectedRsBucket1, testDataProvider.getBucketData(tblName, "536936448"));
    Assert.assertEquals("post-compaction bucket 2", expectedRsBucket2, testDataProvider.getBucketData(tblName, "537001984"));
    // Check bucket file contents
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 1);
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 2);
    CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, false, new String[] { AcidUtils.BASE_PREFIX });
    conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, originalEnableVersionFile);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 95 with HiveMetaStoreClient

use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.

the class TestCrudCompactorOnTez method testMajorCompactionWithBloomFilter.

/**
 * Query based compaction should respect the orc.bloom.filter properties
 * @throws Exception
 */
@Test
public void testMajorCompactionWithBloomFilter() throws Exception {
    String dbName = "default";
    String tblName = "testMajorCompaction";
    TestDataProvider testDataProvider = new TestDataProvider();
    Map<String, String> additionalTblProperties = new HashMap<>();
    additionalTblProperties.put("orc.bloom.filter.columns", "b");
    additionalTblProperties.put("orc.bloom.filter.fpp", "0.02");
    testDataProvider.createFullAcidTable(dbName, tblName, false, false, additionalTblProperties);
    testDataProvider.insertTestData(tblName);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    // Verify deltas are present
    Assert.assertEquals("Delta directories does not match before compaction", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
    // Check bucket file contains the bloomFilter
    checkBloomFilterInAcidFile(fs, new Path(table.getSd().getLocation(), "delta_0000001_0000001_0000/bucket_00000_0"));
    // Run major compaction and cleaner
    CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
    CompactorTestUtil.runCleaner(conf);
    verifySuccessfulCompaction(1);
    // Should contain only one base directory now
    String expectedBase = "base_0000005_v0000008";
    Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
    // Check base dir contents
    List<String> expectedBucketFiles = Arrays.asList("bucket_00000");
    Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, expectedBase));
    // Check bucket file contents
    checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
    checkBloomFilterInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase + "/bucket_00000"));
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Aggregations

HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)141 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)81 Test (org.junit.Test)78 Table (org.apache.hadoop.hive.metastore.api.Table)60 FileSystem (org.apache.hadoop.fs.FileSystem)57 Path (org.apache.hadoop.fs.Path)45 HiveConf (org.apache.hadoop.hive.conf.HiveConf)31 Before (org.junit.Before)23 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)18 FileStatus (org.apache.hadoop.fs.FileStatus)17 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)16 File (java.io.File)12 IOException (java.io.IOException)12 HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)12 ArrayList (java.util.ArrayList)11 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)10 StreamingConnection (org.apache.hive.streaming.StreamingConnection)10 List (java.util.List)9 HashMap (java.util.HashMap)8 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)8