use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class MsckOperation method execute.
@Override
public int execute() throws HiveException, IOException, TException {
try {
Msck msck = new Msck(false, false);
msck.init(Msck.getMsckConf(context.getDb().getConf()));
msck.updateExpressionProxy(getProxyClass(context.getDb().getConf()));
TableName tableName = HiveTableName.of(desc.getTableName());
long partitionExpirySeconds = -1L;
try (HiveMetaStoreClient msc = new HiveMetaStoreClient(context.getConf())) {
boolean msckEnablePartitionRetention = MetastoreConf.getBoolVar(context.getConf(), MetastoreConf.ConfVars.MSCK_REPAIR_ENABLE_PARTITION_RETENTION);
if (msckEnablePartitionRetention) {
Table table = msc.getTable(SessionState.get().getCurrentCatalog(), tableName.getDb(), tableName.getTable());
String qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table);
partitionExpirySeconds = PartitionManagementTask.getRetentionPeriodInSeconds(table);
LOG.info("{} - Retention period ({}s) for partition is enabled for MSCK REPAIR..", qualifiedTableName, partitionExpirySeconds);
}
}
MsckInfo msckInfo = new MsckInfo(SessionState.get().getCurrentCatalog(), tableName.getDb(), tableName.getTable(), desc.getFilterExp(), desc.getResFile(), desc.isRepairPartitions(), desc.isAddPartitions(), desc.isDropPartitions(), partitionExpirySeconds);
return msck.repair(msckInfo);
} catch (MetaException e) {
LOG.error("Unable to create msck instance.", e);
return 1;
} catch (SemanticException e) {
LOG.error("Msck failed.", e);
return 1;
}
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testIowMinorMajor.
/**
* After running insert overwrite, followed by a minor compaction, major compaction was failing because minor
* compaction was resulting in deltas named delta_1_y.
*/
@Test
public void testIowMinorMajor() throws Exception {
String dbName = "default";
String tableName = "testIowMinorMajor";
// Create test table
executeStatementOnDriver("CREATE TABLE " + tableName + " (id string, value string)" + "CLUSTERED BY(id) INTO 2 BUCKETS " + "STORED AS ORC TBLPROPERTIES('transactional'='true')", driver);
// Find the location of the table
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);
// Insert test data into test table
executeStatementOnDriver("insert overwrite table " + tableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten'),('11','eleven')," + "('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen'),('17','seventeen')," + "('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
executeStatementOnDriver("delete from " + tableName + " where id in ('2', '4', '12', '15')", driver);
executeStatementOnDriver("delete from " + tableName + " where id in ('11', '10', '14', '5')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('21', 'value21'),('84', 'value84'),('66', 'value66'),('54', 'value54')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('22', 'value22'),('34', 'value34'),('35', 'value35')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('75', 'value75'),('99', 'value99')", driver);
// Verify deltas
Assert.assertEquals("Delta directories does not match", Arrays.asList("delta_0000004_0000004_0000", "delta_0000005_0000005_0000", "delta_0000006_0000006_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify delete delta
Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000002_0000002_0000", "delete_delta_0000003_0000003_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
// Get all data before compaction is run
TestDataProvider dataProvider = new TestDataProvider();
List<String> expectedData = dataProvider.getAllData(tableName);
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
// Clean up resources
CompactorTestUtil.runCleaner(conf);
// Only 1 compaction should be in the response queue with succeeded state
verifySuccessfulCompaction(1);
// Verify deltas
Assert.assertEquals("Delta directories does not match", Collections.singletonList("delta_0000002_0000006_v0000009"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify delete delta
Assert.assertEquals("Delete directories does not match", Collections.singletonList("delete_delta_0000002_0000006_v0000009"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
// Verify all contents
List<String> actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, true);
// Clean up resources
CompactorTestUtil.runCleaner(conf);
// 2 compactions should be in the response queue with succeeded state
verifySuccessfulCompaction(2);
// Should contain only one base directory now
String expectedBase = "base_0000006_v0000023";
Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
// Check base dir contents
List<String> expectedBucketFiles = Arrays.asList("bucket_00000", "bucket_00001");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, expectedBase));
// Check bucket file contents
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 1);
// Verify all contents
actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
// Clean up
dataProvider.dropTable(tableName);
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testMinorCompactionNotPartitionedWithoutBuckets.
@Test
public void testMinorCompactionNotPartitionedWithoutBuckets() throws Exception {
String dbName = "default";
String tableName = "testMinorCompaction";
// Create test table
TestDataProvider dataProvider = new TestDataProvider();
dataProvider.createFullAcidTable(tableName, false, false);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);
// Insert test data into test table
dataProvider.insertTestData(tableName);
// Get all data before compaction is run
List<String> expectedData = dataProvider.getAllData(tableName);
// Verify deltas
Assert.assertEquals("Delta directories does not match", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify delete delta
Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000", "delete_delta_0000005_0000005_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
// Clean up resources
CompactorTestUtil.runCleaner(conf);
// Only 1 compaction should be in the response queue with succeeded state
verifySuccessfulCompaction(1);
// Verify delta directories after compaction
List<String> actualDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
Assert.assertEquals("Delta directories does not match after compaction", Collections.singletonList("delta_0000001_0000005_v0000009"), actualDeltasAfterComp);
List<String> actualDeleteDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null);
Assert.assertEquals("Delete delta directories does not match after compaction", Collections.singletonList("delete_delta_0000001_0000005_v0000009"), actualDeleteDeltasAfterComp);
// Verify bucket files in delta dirs
List<String> expectedBucketFiles = Collections.singletonList("bucket_00000");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeltasAfterComp.get(0)));
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeleteDeltasAfterComp.get(0)));
// Verify contents of bucket files.
// Bucket 0
List<String> expectedRsBucket0 = Arrays.asList("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t2\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t2\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":2}\t3\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":4}\t4\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":5}\t4\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t5\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":2}\t5\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":3}\t6\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":4}\t6\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":5}\t6\t4");
List<String> rsBucket0 = dataProvider.getBucketData(tableName, "536870912");
Assert.assertEquals(expectedRsBucket0, rsBucket0);
// Verify all contents
List<String> actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE), new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
// Clean up
dataProvider.dropTable(tableName);
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testMajorCompactionNotPartitioned4Buckets.
/**
* TestDataProvider uses 2 buckets, I want to test 4 buckets here.
* @throws Exception
*/
@Test
public void testMajorCompactionNotPartitioned4Buckets() throws Exception {
boolean originalEnableVersionFile = conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE);
conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, false);
String dbName = "default";
String tblName = "testMajorCompaction";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("create transactional table " + tblName + " (a int, b int) clustered" + " by (a) into 4 buckets" + " stored as ORC TBLPROPERTIES('bucketing_version'='2', 'transactional'='true'," + " 'transactional_properties'='default')", driver);
executeStatementOnDriver("insert into " + tblName + " values(1,2),(1,3),(1,4),(2,2),(2,3),(2,4)", driver);
executeStatementOnDriver("insert into " + tblName + " values(3,2),(3,3),(3,4),(4,2),(4,3),(4,4)", driver);
executeStatementOnDriver("delete from " + tblName + " where b = 2", driver);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
// Verify deltas (delta_0000001_0000001_0000, delta_0000002_0000002_0000) are present
Assert.assertEquals("Delta directories does not match before compaction", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify that delete delta (delete_delta_0000003_0000003_0000) is present
Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
List<String> expectedRsBucket0 = new ArrayList<>(Arrays.asList("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t2\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t2\t4"));
List<String> expectedRsBucket1 = new ArrayList<>(Arrays.asList("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t1\t3", "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t1\t4", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t4\t3", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":2}\t4\t4"));
List<String> expectedRsBucket2 = new ArrayList<>(Arrays.asList("{\"writeid\":2,\"bucketid\":537001984,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":537001984,\"rowid\":2}\t3\t4"));
TestDataProvider testDataProvider = new TestDataProvider();
List<String> preCompactionRsBucket0 = testDataProvider.getBucketData(tblName, "536870912");
List<String> preCompactionRsBucket1 = testDataProvider.getBucketData(tblName, "536936448");
List<String> preCompactionRsBucket2 = testDataProvider.getBucketData(tblName, "537001984");
Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0, preCompactionRsBucket0);
Assert.assertEquals("pre-compaction bucket 1", expectedRsBucket1, preCompactionRsBucket1);
Assert.assertEquals("pre-compaction bucket 2", expectedRsBucket2, preCompactionRsBucket2);
// Run major compaction and cleaner
CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(1);
// Should contain only one base directory now
String expectedBase = "base_0000003_v0000009";
Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
// Check files in base
List<String> expectedBucketFiles = Arrays.asList("bucket_00000", "bucket_00001", "bucket_00002");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, "base_0000003_v0000009"));
// Check buckets contents
Assert.assertEquals("post-compaction bucket 0", expectedRsBucket0, testDataProvider.getBucketData(tblName, "536870912"));
Assert.assertEquals("post-compaction bucket 1", expectedRsBucket1, testDataProvider.getBucketData(tblName, "536936448"));
Assert.assertEquals("post-compaction bucket 2", expectedRsBucket2, testDataProvider.getBucketData(tblName, "537001984"));
// Check bucket file contents
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 1);
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 2);
CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, false, new String[] { AcidUtils.BASE_PREFIX });
conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, originalEnableVersionFile);
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testMajorCompactionWithBloomFilter.
/**
* Query based compaction should respect the orc.bloom.filter properties
* @throws Exception
*/
@Test
public void testMajorCompactionWithBloomFilter() throws Exception {
String dbName = "default";
String tblName = "testMajorCompaction";
TestDataProvider testDataProvider = new TestDataProvider();
Map<String, String> additionalTblProperties = new HashMap<>();
additionalTblProperties.put("orc.bloom.filter.columns", "b");
additionalTblProperties.put("orc.bloom.filter.fpp", "0.02");
testDataProvider.createFullAcidTable(dbName, tblName, false, false, additionalTblProperties);
testDataProvider.insertTestData(tblName);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
// Verify deltas are present
Assert.assertEquals("Delta directories does not match before compaction", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Check bucket file contains the bloomFilter
checkBloomFilterInAcidFile(fs, new Path(table.getSd().getLocation(), "delta_0000001_0000001_0000/bucket_00000_0"));
// Run major compaction and cleaner
CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(1);
// Should contain only one base directory now
String expectedBase = "base_0000005_v0000008";
Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
// Check base dir contents
List<String> expectedBucketFiles = Arrays.asList("bucket_00000");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, expectedBase));
// Check bucket file contents
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
checkBloomFilterInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase + "/bucket_00000"));
}
Aggregations