use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testMinorCompactionNotPartitionedWithBuckets.
@Test
public void testMinorCompactionNotPartitionedWithBuckets() throws Exception {
String dbName = "default";
String tableName = "testMinorCompaction";
// Create test table
TestDataProvider dataProvider = new TestDataProvider();
dataProvider.createFullAcidTable(tableName, false, true);
// Find the location of the table
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);
// Insert test data into test table
dataProvider.insertTestData(tableName);
// Get all data before compaction is run
List<String> expectedData = dataProvider.getAllData(tableName);
// Verify deltas
Assert.assertEquals("Delta directories does not match", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify delete delta
Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000", "delete_delta_0000005_0000005_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
// Clean up resources
CompactorTestUtil.runCleaner(conf);
// Only 1 compaction should be in the response queue with succeeded state
verifySuccessfulCompaction(1);
// Verify delta directories after compaction
List<String> actualDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
Assert.assertEquals("Delta directories does not match after compaction", Collections.singletonList("delta_0000001_0000005_v0000009"), actualDeltasAfterComp);
List<String> actualDeleteDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null);
Assert.assertEquals("Delete delta directories does not match after compaction", Collections.singletonList("delete_delta_0000001_0000005_v0000009"), actualDeleteDeltasAfterComp);
// Verify bucket files in delta dirs
List<String> expectedBucketFiles = Arrays.asList("bucket_00000", "bucket_00001");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeltasAfterComp.get(0)));
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeleteDeltasAfterComp.get(0)));
// Verify contents of bucket files.
// Bucket 0
List<String> expectedRsBucket0 = Arrays.asList("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":2}\t3\t4");
List<String> rsBucket0 = dataProvider.getBucketData(tableName, "536870912");
Assert.assertEquals(expectedRsBucket0, rsBucket0);
// Bucket 1
List<String> expectedRs1Bucket = Arrays.asList("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t3", "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t2\t4", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t4\t3", "{\"writeid\":2,\"bucketid\":536936448,\"rowid\":2}\t4\t4", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t5\t2", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":1}\t5\t3", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":2}\t5\t4", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":3}\t6\t2", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":4}\t6\t3", "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":5}\t6\t4");
List<String> rsBucket1 = dataProvider.getBucketData(tableName, "536936448");
Assert.assertEquals(expectedRs1Bucket, rsBucket1);
// Verify all contents
List<String> actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE), new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
// Clean up
dataProvider.dropTable(tableName);
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCrudCompactorOnTez method testMajorCompactionNotPartitionedWithoutBuckets.
@Test
public void testMajorCompactionNotPartitionedWithoutBuckets() throws Exception {
boolean originalEnableVersionFile = conf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE);
conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, true);
conf.setVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE, CUSTOM_COMPACTION_QUEUE);
String tmpFolder = folder.newFolder().getAbsolutePath();
conf.setVar(HiveConf.ConfVars.HIVE_PROTO_EVENTS_BASE_PATH, tmpFolder);
String dbName = "default";
String tblName = "testMajorCompaction";
TestDataProvider testDataProvider = new TestDataProvider();
testDataProvider.createFullAcidTable(tblName, false, false);
testDataProvider.insertTestData(tblName);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
// Verify deltas (delta_0000001_0000001_0000, delta_0000002_0000002_0000) are present
Assert.assertEquals("Delta directories does not match before compaction", Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000004_0000004_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify that delete delta (delete_delta_0000003_0000003_0000) is present
Assert.assertEquals("Delete directories does not match", Arrays.asList("delete_delta_0000003_0000003_0000", "delete_delta_0000005_0000005_0000"), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
List<String> expectedRsBucket0 = new ArrayList<>(Arrays.asList("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t2\t3", "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t2\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":2}\t3\t4", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":4}\t4\t3", "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":5}\t4\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t5\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":2}\t5\t4", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":3}\t6\t2", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":4}\t6\t3", "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":5}\t6\t4"));
// Check bucket contents
Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0, testDataProvider.getBucketData(tblName, "536870912"));
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, HiveProtoLoggingHook.class.getName());
// Run major compaction and cleaner
CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, StringUtils.EMPTY);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(1);
// Should contain only one base directory now
String expectedBase = "base_0000005_v0000009";
Assert.assertEquals("Base directory does not match after major compaction", Collections.singletonList(expectedBase), CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null));
// Check base dir contents
List<String> expectedBucketFiles = Arrays.asList("bucket_00000");
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, expectedBase));
// Check bucket contents
Assert.assertEquals("post-compaction bucket 0", expectedRsBucket0, testDataProvider.getBucketData(tblName, "536870912"));
// Check bucket file contents
checkBucketIdAndRowIdInAcidFile(fs, new Path(table.getSd().getLocation(), expectedBase), 0);
CompactorTestUtilities.checkAcidVersion(fs.listFiles(new Path(table.getSd().getLocation()), true), fs, true, new String[] { AcidUtils.BASE_PREFIX });
conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, originalEnableVersionFile);
ProtoMessageReader<HiveHookEvents.HiveHookEventProto> reader = TestHiveProtoLoggingHook.getTestReader(conf, tmpFolder);
HiveHookEvents.HiveHookEventProto event = reader.readEvent();
while (ExecutionMode.TEZ != ExecutionMode.valueOf(event.getExecutionMode())) {
event = reader.readEvent();
}
Assert.assertNotNull(event);
Assert.assertEquals(event.getQueue(), CUSTOM_COMPACTION_QUEUE);
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCompactor method testHeartbeatShutdownOnFailedCompaction.
@Test
public void testHeartbeatShutdownOnFailedCompaction() throws Exception {
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
// Commit will throw an exception
IMetaStoreClient mockedClient = Mockito.spy(new HiveMetaStoreClient(conf));
doThrow(new RuntimeException("Simulating RuntimeException from CompactionTxn.commit")).when(mockedClient).commitTxn(Mockito.anyLong());
doAnswer(invocation -> {
Object o = invocation.callRealMethod();
// Check if the heartbeating is running
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().anyMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
return o;
}).when(mockedClient).openTxn(any(), any());
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
Worker worker = Mockito.spy(new Worker());
worker.setThreadId((int) worker.getId());
worker.setConf(conf);
worker.init(new AtomicBoolean(true));
FieldSetter.setField(worker, RemoteCompactorThread.class.getDeclaredField("msc"), mockedClient);
worker.run();
// Check if the transaction was opened
verify(mockedClient, times(1)).openTxn(any(), any());
// Check if the heartbeating is properly terminated
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().noneMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCompactor method testCleanDynPartAbortNoDataLoss.
@Test
public void testCleanDynPartAbortNoDataLoss() throws Exception {
String dbName = "default";
String tblName = "cws";
HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 1);
executeStatementOnDriver("insert into " + tblName + " partition (a) values (1, '1')", driver);
executeStatementOnDriver("update " + tblName + " set b='2' where a=1", driver);
executeStatementOnDriver("insert into " + tblName + " partition (a) values (2, '2')", driver);
executeStatementOnDriver("update " + tblName + " set b='3' where a=2", driver);
connection.beginTransaction();
connection.write("1,1".getBytes());
connection.write("2,2".getBytes());
connection.abortTransaction();
executeStatementOnDriver("insert into " + tblName + " partition (a) values (3, '3')", driver);
executeStatementOnDriver("update " + tblName + " set b='4' where a=3", driver);
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 4, count);
runWorker(conf);
runWorker(conf);
runWorker(conf);
runWorker(conf);
// Cleaning should happen in threads concurrently for the minor compaction and the clean abort one.
runCleaner(conf);
count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Partition p1 = msClient.getPartition(dbName, tblName, "a=1"), p2 = msClient.getPartition(dbName, tblName, "a=2"), p3 = msClient.getPartition(dbName, tblName, "a=3");
msClient.close();
FileSystem fs = FileSystem.get(conf);
verifyDeltaCount(p1.getSd(), fs, 0);
verifyHasBase(p1.getSd(), fs, "base_0000002_v0000010");
verifyDeltaCount(p2.getSd(), fs, 0);
verifyHasBase(p2.getSd(), fs, "base_0000004_v0000012");
verifyDeltaCount(p3.getSd(), fs, 0);
verifyHasBase(p3.getSd(), fs, "base_0000007_v0000014");
}
use of org.apache.hadoop.hive.metastore.HiveMetaStoreClient in project hive by apache.
the class TestCompactor method autoCompactOnStreamingIngestWithDynamicPartition.
@Test
public void autoCompactOnStreamingIngestWithDynamicPartition() throws Exception {
String dbName = "default";
String tblName = "cws";
String columnNamesProperty = "a,b";
String columnTypesProperty = "string:int";
String agentInfo = "UT_" + Thread.currentThread().getName();
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a STRING) " + // currently ACID requires table to be bucketed
" PARTITIONED BY (b INT)" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer1 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StreamingConnection connection1 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer1).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
StreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer2).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
try {
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.close();
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts1 = rsp.getCompacts();
Assert.assertEquals(1, compacts1.size());
SortedSet<String> partNames1 = new TreeSet<String>();
verifyCompactions(compacts1, partNames1, tblName);
List<String> names1 = new ArrayList<String>(partNames1);
Assert.assertEquals("b=1", names1.get(0));
runWorker(conf);
runCleaner(conf);
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.close();
runInitiator(conf);
List<ShowCompactResponseElement> compacts2 = rsp.getCompacts();
Assert.assertEquals(1, compacts2.size());
SortedSet<String> partNames2 = new TreeSet<String>();
verifyCompactions(compacts2, partNames2, tblName);
List<String> names2 = new ArrayList<String>(partNames2);
Assert.assertEquals("b=1", names2.get(0));
runWorker(conf);
runCleaner(conf);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
String tablePath = table.getSd().getLocation();
String partName = "b=1";
Path partPath = new Path(tablePath, partName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(partPath, AcidUtils.baseFileFilter);
if (1 != stat.length) {
Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
}
String name = stat[0].getPath().getName();
Assert.assertEquals("base_0000005_v0000009", name);
CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
} finally {
if (connection1 != null) {
connection1.close();
}
if (connection2 != null) {
connection2.close();
}
}
}
Aggregations