use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method schemaEvolutionAddColDynamicPartitioningUpdate.
@Test
public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception {
String tblName = "udpct";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
// Validate the update.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(2, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1));
// ALTER TABLE ... ADD COLUMNS
executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
// Validate there is an added NULL for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(2, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
// Second INSERT round with new inserts into previously existing partition 'yesterday'.
executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
// Validate there the new insertions for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
executeStatementOnDriver("update " + tblName + " set c = 2000", driver);
// Validate the update of new column c, even in old rows.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
// Set to 1 so insert doesn't set it off but update does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(4, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
verifyCompactions(compacts, partNames, tblName);
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=last_century", names.get(0));
Assert.assertEquals("ds=soon", names.get(1));
Assert.assertEquals("ds=today", names.get(2));
Assert.assertEquals("ds=yesterday", names.get(3));
// Validate after compaction.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method dynamicPartitioningUpdate.
@Test
public void dynamicPartitioningUpdate() throws Exception {
String tblName = "udpct";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
// Set to 1 so insert doesn't set it off but update does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(2, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
verifyCompactions(compacts, partNames, tblName);
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
Assert.assertEquals("ds=yesterday", names.get(1));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method dynamicPartitioningInsert.
@Test
public void dynamicPartitioningInsert() throws Exception {
String tblName = "dpct";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(2, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
verifyCompactions(compacts, partNames, tblName);
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
Assert.assertEquals("ds=yesterday", names.get(1));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCrudCompactorOnTez method testMinorCompactionShouldBeRefusedOnTablesWithOriginalFiles.
@Test
public void testMinorCompactionShouldBeRefusedOnTablesWithOriginalFiles() throws Exception {
conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
// Set delta numbuer threshold to 2 to avoid skipping compaction because of too few deltas
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
// Set delta percentage to a high value to suppress selecting major compression based on that
conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 1000f);
final String dbName = "default";
final String tableName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tableName, driver);
executeStatementOnDriver("CREATE TABLE " + tableName + "(id string, value string) CLUSTERED BY(id) " + "INTO 10 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='false')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten')," + "('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen')," + "('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
executeStatementOnDriver("alter table " + tableName + " set TBLPROPERTIES('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('21', 'value21'),('84', 'value84')," + "('66', 'value66'),('54', 'value54')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('22', 'value22'),('34', 'value34')," + "('35', 'value35')", driver);
executeStatementOnDriver("insert into " + tableName + " values ('75', 'value75'),('99', 'value99')", driver);
execSelectAndDumpData("select * from " + tableName, driver, "Dumping data for " + tableName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
// Prevent initiator from submitting the compaction requests
TxnStore mockedHandler = spy(txnHandler);
doThrow(new RuntimeException("")).when(mockedHandler).compact(nullable(CompactionRequest.class));
Initiator initiator = new Initiator();
initiator.setConf(conf);
initiator.init(new AtomicBoolean(true));
FieldSetter.setField(initiator, MetaStoreCompactorThread.class.getDeclaredField("txnHandler"), mockedHandler);
// Run initiator and capture compaction requests
initiator.run();
// Check captured compaction request and if the type for the table was MAJOR
ArgumentCaptor<CompactionRequest> requests = ArgumentCaptor.forClass(CompactionRequest.class);
verify(mockedHandler).compact(requests.capture());
Assert.assertTrue(requests.getAllValues().stream().anyMatch(r -> r.getTablename().equals(tableName) && r.getType().equals(CompactionType.MAJOR)));
// Try to do a minor compaction directly
CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MINOR);
txnHandler.compact(rqst);
runWorker(conf);
// Check if both compactions were failed with the expected error message
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (2 != compacts.size()) {
Assert.fail("Expecting 2 rows and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("did not initiate", compacts.get(0).getState());
Assert.assertTrue(compacts.get(0).getErrorMessage().startsWith("Caught exception while trying to determine if we should compact"));
Assert.assertEquals("refused", compacts.get(1).getState());
Assert.assertTrue(compacts.get(1).getErrorMessage().startsWith("Query based Minor compaction is not possible for full acid tables having raw format (non-acid) data in them."));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCrudCompactorOnTez method testMinorAndMajorCompactionWithoutBuckets.
@Test
public void testMinorAndMajorCompactionWithoutBuckets() throws Exception {
String dbName = "default";
String tableName = "testMinorCompaction_wobuckets_5";
String tempTableName = "tmp_txt_table_5";
TestDataProvider dataProvider = new TestDataProvider();
dataProvider.createTableWithoutBucketWithMultipleSplits(dbName, tableName, tempTableName, true, true, false);
FileSystem fs = FileSystem.get(conf);
Table table = msClient.getTable(dbName, tableName);
List<String> expectedData = dataProvider.getAllData(tableName);
// Verify deltas
List<String> expectedDeltas = new ArrayList<>();
expectedDeltas.add("delta_0000001_0000001_0000");
expectedDeltas.add("delta_0000006_0000006_0000");
expectedDeltas.add("delta_0000007_0000007_0000");
expectedDeltas.add("delta_0000008_0000008_0000");
Assert.assertEquals("Delta directories does not match", expectedDeltas, CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null));
// Verify delete delta
List<String> expectedDeleteDeltas = new ArrayList<>();
expectedDeleteDeltas.add("delete_delta_0000002_0000002_0000");
expectedDeleteDeltas.add("delete_delta_0000003_0000003_0000");
expectedDeleteDeltas.add("delete_delta_0000004_0000004_0000");
expectedDeleteDeltas.add("delete_delta_0000005_0000005_0000");
Assert.assertEquals("Delete directories does not match", expectedDeleteDeltas, CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null));
List<String> expectedBucketFiles = CompactorTestUtil.getBucketFileNamesWithoutAttemptId(fs, table, null, expectedDeltas);
List<String> expectedDeleteBucketFiles = CompactorTestUtil.getBucketFileNamesWithoutAttemptId(fs, table, null, expectedDeleteDeltas);
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
CompactorTestUtil.runCleaner(conf);
// Only 1 compaction should be in the response queue with succeeded state
List<ShowCompactResponseElement> compacts = TxnUtils.getTxnStore(conf).showCompact(new ShowCompactRequest()).getCompacts();
Assert.assertEquals("Completed compaction queue must contain one element", 1, compacts.size());
Assert.assertEquals("Compaction state is not succeeded", "succeeded", compacts.get(0).getState());
// Verify delta directories after compaction
List<String> actualDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
Assert.assertEquals("Delta directories does not match after compaction", Collections.singletonList("delta_0000001_0000008_v0000024"), actualDeltasAfterComp);
List<String> actualDeleteDeltasAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null);
Assert.assertEquals("Delete delta directories does not match after compaction", Collections.singletonList("delete_delta_0000001_0000008_v0000024"), actualDeleteDeltasAfterComp);
// Verify bucket files in delta dirs
List<String> actualData = dataProvider.getAllData(tableName);
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeltasAfterComp.get(0)));
Assert.assertEquals("Bucket names in delete delta are not matching after compaction", expectedDeleteBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualDeleteDeltasAfterComp.get(0)));
// Verify all contents
// List<String> actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, true);
// Clean up resources
CompactorTestUtil.runCleaner(conf);
// Only 1 compaction should be in the response queue with succeeded state
compacts = TxnUtils.getTxnStore(conf).showCompact(new ShowCompactRequest()).getCompacts();
Assert.assertEquals("Completed compaction queue must contain one element", 2, compacts.size());
Assert.assertEquals("Compaction state is not succeeded", "succeeded", compacts.get(0).getState());
// Verify delta directories after compaction
List<String> actualBasesAfterComp = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.baseFileFilter, table, null);
Assert.assertEquals("Base directory does not match after compaction", Collections.singletonList("base_0000008_v0000038"), actualBasesAfterComp);
// Verify bucket files in delta dirs
Assert.assertEquals("Bucket names are not matching after compaction", expectedBucketFiles, CompactorTestUtil.getBucketFileNames(fs, table, null, actualBasesAfterComp.get(0)));
// Verify all contents
actualData = dataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, actualData);
dataProvider.dropTable(tableName);
}
Aggregations