use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.
the class TestCompactor method testStatsAfterCompactionPartTbl.
/**
* After each major compaction, stats need to be updated on the table
* 1. create a partitioned ORC backed table (Orc is currently required by ACID)
* 2. populate with data
* 3. compute stats
* 4. Trigger major compaction on one of the partitions (which should update stats)
* 5. check that stats have been updated for that partition only
*
* @throws Exception todo:
* 4. add a test with sorted table?
*/
@Test
public void testStatsAfterCompactionPartTbl() throws Exception {
// as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
Table table = msClient.getTable(dbName, tblName);
// compute stats before compaction
CompactionInfo ci = new CompactionInfo(dbName, tblName, "bkt=0", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
ci = new CompactionInfo(dbName, tblName, "bkt=1", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
// Check basic stats are collected
org.apache.hadoop.hive.ql.metadata.Table hiveTable = Hive.get().getTable(tblName);
List<org.apache.hadoop.hive.ql.metadata.Partition> partitions = Hive.get().getPartitions(hiveTable);
Map<String, String> parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1373", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (1 != compacts.size()) {
Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
// Check basic stats are updated for partition bkt=0, but not updated for partition bkt=1
partitions = Hive.get().getPartitions(hiveTable);
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "801", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
}
use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.
the class TestCrudCompactorOnTez method testStatsAfterQueryCompactionOnTez.
/**
* After each major compaction, stats need to be updated on the table
* 1. create an ORC backed table (Orc is currently required by ACID)
* 2. populate with data
* 3. compute stats
* 4. Trigger major compaction (which should update stats)
* 5. check that stats have been updated
*/
@Test
public void testStatsAfterQueryCompactionOnTez() throws Exception {
// as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(55, 'London')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " values(56, 'Paris')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
Table table = msClient.getTable(dbName, tblName);
// compute stats before compaction
CompactionInfo ci = new CompactionInfo(dbName, tblName, null, CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
// Check basic stats are collected
Map<String, String> parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1434", parameters.get("totalSize"));
// Do a major compaction
CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (1 != compacts.size()) {
Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
// Check basic stats are updated
parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "727", parameters.get("totalSize"));
}
use of org.apache.hadoop.hive.metastore.txn.CompactionInfo in project hive by apache.
the class HMSHandler method commit_txn.
@Override
public void commit_txn(CommitTxnRequest rqst) throws TException {
boolean isReplayedReplTxn = TxnType.REPL_CREATED.equals(rqst.getTxn_type());
boolean isHiveReplTxn = rqst.isSetReplPolicy() && TxnType.DEFAULT.equals(rqst.getTxn_type());
// in replication flow, the write notification log table will be updated here.
if (rqst.isSetWriteEventInfos() && isReplayedReplTxn) {
assert (rqst.isSetReplPolicy());
long targetTxnId = getTxnHandler().getTargetTxnId(rqst.getReplPolicy(), rqst.getTxnid());
if (targetTxnId < 0) {
// looks like a retry
return;
}
for (WriteEventInfo writeEventInfo : rqst.getWriteEventInfos()) {
String[] filesAdded = ReplChangeManager.getListFromSeparatedString(writeEventInfo.getFiles());
List<String> partitionValue = null;
Partition ptnObj = null;
String root;
Table tbl = getTblObject(writeEventInfo.getDatabase(), writeEventInfo.getTable(), null);
if (writeEventInfo.getPartition() != null && !writeEventInfo.getPartition().isEmpty()) {
partitionValue = Warehouse.getPartValuesFromPartName(writeEventInfo.getPartition());
ptnObj = getPartitionObj(writeEventInfo.getDatabase(), writeEventInfo.getTable(), partitionValue, tbl);
root = ptnObj.getSd().getLocation();
} else {
root = tbl.getSd().getLocation();
}
InsertEventRequestData insertData = new InsertEventRequestData();
insertData.setReplace(true);
// warehouse. Need to transform them to target warehouse using table or partition object location.
for (String file : filesAdded) {
String[] decodedPath = ReplChangeManager.decodeFileUri(file);
String name = (new Path(decodedPath[0])).getName();
Path newPath = FileUtils.getTransformedPath(name, decodedPath[3], root);
insertData.addToFilesAdded(newPath.toUri().toString());
insertData.addToSubDirectoryList(decodedPath[3]);
try {
insertData.addToFilesAddedChecksum(ReplChangeManager.checksumFor(newPath, newPath.getFileSystem(conf)));
} catch (IOException e) {
LOG.error("failed to get checksum for the file " + newPath + " with error: " + e.getMessage());
throw new TException(e.getMessage());
}
}
WriteNotificationLogRequest wnRqst = new WriteNotificationLogRequest(targetTxnId, writeEventInfo.getWriteId(), writeEventInfo.getDatabase(), writeEventInfo.getTable(), insertData);
if (partitionValue != null) {
wnRqst.setPartitionVals(partitionValue);
}
addTxnWriteNotificationLog(tbl, ptnObj, wnRqst);
}
}
getTxnHandler().commitTxn(rqst);
if (listeners != null && !listeners.isEmpty() && !isHiveReplTxn) {
MetaStoreListenerNotifier.notifyEvent(listeners, EventType.COMMIT_TXN, new CommitTxnEvent(rqst.getTxnid(), this));
Optional<CompactionInfo> compactionInfo = getTxnHandler().getCompactionByTxnId(rqst.getTxnid());
if (compactionInfo.isPresent()) {
MetaStoreListenerNotifier.notifyEvent(listeners, EventType.COMMIT_COMPACTION, new CommitCompactionEvent(rqst.getTxnid(), compactionInfo.get(), this));
}
}
}
Aggregations