use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCrudCompactorOnTez method testMinorCompactionShouldBeRefusedOnTablesWithRawData.
@Test
public void testMinorCompactionShouldBeRefusedOnTablesWithRawData() throws Exception {
conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
// Set delta numbuer threshold to 2 to avoid skipping compaction because of too few deltas
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
// Set delta percentage to a high value to suppress selecting major compression based on that
conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 1000f);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
final String dbName = "default";
final String origTableName = "compaction_test";
final String testTableName = "imported";
executeStatementOnDriver("drop table if exists " + origTableName, driver);
executeStatementOnDriver("drop table if exists " + testTableName, driver);
executeStatementOnDriver("CREATE TABLE " + origTableName + "(id string, value string) CLUSTERED BY(id) " + "INTO 10 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + origTableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten')," + "('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen')," + "('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
execSelectAndDumpData("select * from " + origTableName, driver, "Dumping data for " + origTableName + " after load:");
executeStatementOnDriver("export table " + origTableName + " to '/tmp/temp_acid'", driver);
executeStatementOnDriver("import table " + testTableName + " from '/tmp/temp_acid'", driver);
executeStatementOnDriver("insert into " + testTableName + " values ('21', 'value21'),('84', 'value84')," + "('66', 'value66'),('54', 'value54')", driver);
executeStatementOnDriver("insert into " + testTableName + " values ('22', 'value22'),('34', 'value34')," + "('35', 'value35')", driver);
executeStatementOnDriver("insert into " + testTableName + " values ('75', 'value75'),('99', 'value99')", driver);
// Prevent initiator from submitting the compaction requests
TxnStore mockedHandler = spy(txnHandler);
doThrow(new RuntimeException("")).when(mockedHandler).compact(nullable(CompactionRequest.class));
Initiator initiator = new Initiator();
initiator.setConf(conf);
initiator.init(new AtomicBoolean(true));
FieldSetter.setField(initiator, MetaStoreCompactorThread.class.getDeclaredField("txnHandler"), mockedHandler);
// Run initiator and capture compaction requests
initiator.run();
// Check captured compaction request and if the type for the table was MAJOR
ArgumentCaptor<CompactionRequest> requests = ArgumentCaptor.forClass(CompactionRequest.class);
verify(mockedHandler).compact(requests.capture());
Assert.assertTrue(requests.getAllValues().stream().anyMatch(r -> r.getTablename().equals(testTableName) && r.getType().equals(CompactionType.MAJOR)));
// Try to do a minor compaction directly
CompactionRequest rqst = new CompactionRequest(dbName, testTableName, CompactionType.MINOR);
txnHandler.compact(rqst);
runWorker(conf);
// Check if both compactions were failed with the expected error message
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (2 != compacts.size()) {
Assert.fail("Expecting 2 rows and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("did not initiate", compacts.get(0).getState());
Assert.assertTrue(compacts.get(0).getErrorMessage().startsWith("Caught exception while trying to determine if we should compact"));
Assert.assertEquals("refused", compacts.get(1).getState());
Assert.assertTrue(compacts.get(1).getErrorMessage().startsWith("Query based Minor compaction is not possible for full acid tables having raw format (non-acid) data in them."));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method testStatsAfterCompactionTbl.
/**
* After each major compaction, stats need to be updated on the table
* 1. create an ORC backed table (Orc is currently required by ACID)
* 2. populate with data
* 3. compute stats
* 4. Trigger major compaction (which should update stats)
* 5. check that stats have been updated
*
* @throws Exception todo:
* 4. add a test with sorted table?
*/
@Test
public void testStatsAfterCompactionTbl() throws Exception {
// as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(55, 'London')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(56, 'Paris')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
Table table = msClient.getTable(dbName, tblName);
// compute stats before compaction
CompactionInfo ci = new CompactionInfo(dbName, tblName, null, CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
// Check basic stats are collected
Map<String, String> parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1434", parameters.get("totalSize"));
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (1 != compacts.size()) {
Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
// Check basic stats are updated
parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "776", parameters.get("totalSize"));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method autoCompactOnStreamingIngestWithDynamicPartition.
@Test
public void autoCompactOnStreamingIngestWithDynamicPartition() throws Exception {
String dbName = "default";
String tblName = "cws";
String columnNamesProperty = "a,b";
String columnTypesProperty = "string:int";
String agentInfo = "UT_" + Thread.currentThread().getName();
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a STRING) " + // currently ACID requires table to be bucketed
" PARTITIONED BY (b INT)" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer1 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StreamingConnection connection1 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer1).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
StreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer2).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
try {
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.close();
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts1 = rsp.getCompacts();
Assert.assertEquals(1, compacts1.size());
SortedSet<String> partNames1 = new TreeSet<String>();
verifyCompactions(compacts1, partNames1, tblName);
List<String> names1 = new ArrayList<String>(partNames1);
Assert.assertEquals("b=1", names1.get(0));
runWorker(conf);
runCleaner(conf);
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.close();
runInitiator(conf);
List<ShowCompactResponseElement> compacts2 = rsp.getCompacts();
Assert.assertEquals(1, compacts2.size());
SortedSet<String> partNames2 = new TreeSet<String>();
verifyCompactions(compacts2, partNames2, tblName);
List<String> names2 = new ArrayList<String>(partNames2);
Assert.assertEquals("b=1", names2.get(0));
runWorker(conf);
runCleaner(conf);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
String tablePath = table.getSd().getLocation();
String partName = "b=1";
Path partPath = new Path(tablePath, partName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(partPath, AcidUtils.baseFileFilter);
if (1 != stat.length) {
Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
}
String name = stat[0].getPath().getName();
Assert.assertEquals("base_0000005_v0000009", name);
CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
} finally {
if (connection1 != null) {
connection1.close();
}
if (connection2 != null) {
connection2.close();
}
}
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method dynamicPartitioningDelete.
@Test
public void dynamicPartitioningDelete() throws Exception {
String tblName = "ddpct";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
executeStatementOnDriver("update " + tblName + " set b = 'fred' where a = 1", driver);
executeStatementOnDriver("delete from " + tblName + " where b = 'fred'", driver);
// Set to 2 so insert and update don't set it off but delete does
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
verifyCompactions(compacts, partNames, tblName);
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=today", names.get(0));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method testStatsAfterCompactionPartTbl.
/**
* After each major compaction, stats need to be updated on the table
* 1. create a partitioned ORC backed table (Orc is currently required by ACID)
* 2. populate with data
* 3. compute stats
* 4. Trigger major compaction on one of the partitions (which should update stats)
* 5. check that stats have been updated for that partition only
*
* @throws Exception todo:
* 4. add a test with sorted table?
*/
@Test
public void testStatsAfterCompactionPartTbl() throws Exception {
// as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
Table table = msClient.getTable(dbName, tblName);
// compute stats before compaction
CompactionInfo ci = new CompactionInfo(dbName, tblName, "bkt=0", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
ci = new CompactionInfo(dbName, tblName, "bkt=1", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
// Check basic stats are collected
org.apache.hadoop.hive.ql.metadata.Table hiveTable = Hive.get().getTable(tblName);
List<org.apache.hadoop.hive.ql.metadata.Partition> partitions = Hive.get().getPartitions(hiveTable);
Map<String, String> parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1373", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (1 != compacts.size()) {
Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
// Check basic stats are updated for partition bkt=0, but not updated for partition bkt=1
partitions = Hive.get().getPartitions(hiveTable);
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "801", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
}
Aggregations