use of org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker in project hive by apache.
the class TestCompactor method testCompactionOnDataLoadedInPath.
/**
* Tests compaction of tables that were populated by LOAD DATA INPATH statements.
*
* In this scenario original ORC files are a structured in the following way:
* comp3
* |--delta_0000001_0000001_0000
* |--000000_0
* |--delta_0000002_0000002_0000
* |--000000_0
* |--000001_0
*
* ..where comp3 table is not bucketed.
*
* @throws Exception
*/
@Test
public void testCompactionOnDataLoadedInPath() throws Exception {
// Setup of LOAD INPATH scenario.
executeStatementOnDriver("drop table if exists comp0", driver);
executeStatementOnDriver("drop table if exists comp1", driver);
executeStatementOnDriver("drop table if exists comp3", driver);
executeStatementOnDriver("create external table comp0 (a string)", driver);
executeStatementOnDriver("insert into comp0 values ('1111111111111')", driver);
executeStatementOnDriver("insert into comp0 values ('2222222222222')", driver);
executeStatementOnDriver("insert into comp0 values ('3333333333333')", driver);
executeStatementOnDriver("create external table comp1 stored as orc as select * from comp0", driver);
executeStatementOnDriver("create table comp3 (a string) stored as orc " + "TBLPROPERTIES ('transactional'='true')", driver);
IMetaStoreClient hmsClient = new HiveMetaStoreClient(conf);
Table table = hmsClient.getTable("default", "comp1");
FileSystem fs = FileSystem.get(conf);
Path path000 = fs.listStatus(new Path(table.getSd().getLocation()))[0].getPath();
Path path001 = new Path(path000.toString().replace("000000", "000001"));
Path path002 = new Path(path000.toString().replace("000000", "000002"));
fs.copyFromLocalFile(path000, path001);
fs.copyFromLocalFile(path000, path002);
executeStatementOnDriver("load data inpath '" + path002.toString() + "' into table comp3", driver);
executeStatementOnDriver("load data inpath '" + path002.getParent().toString() + "' into table comp3", driver);
// Run compaction.
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
CompactionRequest rqst = new CompactionRequest("default", "comp3", CompactionType.MAJOR);
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactRequest scRqst = new ShowCompactRequest();
List<ShowCompactResponseElement> compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(1, compacts.size());
assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
runCleaner(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(1, compacts.size());
assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
// Check compacted content and file structure.
table = hmsClient.getTable("default", "comp3");
List<String> rs = execSelectAndDumpData("select * from comp3", driver, "select");
assertEquals(9, rs.size());
assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
FileStatus[] files = fs.listStatus(new Path(table.getSd().getLocation()));
// base dir
assertEquals(1, files.length);
assertEquals("base_0000002_v0000012", files[0].getPath().getName());
files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
// files
assertEquals(2, files.length);
Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
// Another insert into the newly compacted table.
executeStatementOnDriver("insert into comp3 values ('4444444444444')", driver);
// Compact with extra row too.
txnHandler.compact(rqst);
runWorker(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(2, compacts.size());
assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
runCleaner(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(2, compacts.size());
assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
// Check compacted content and file structure.
rs = execSelectAndDumpData("select * from comp3", driver, "select");
assertEquals(10, rs.size());
assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
assertEquals(1, rs.stream().filter(p -> "4444444444444".equals(p)).count());
files = fs.listStatus(new Path(table.getSd().getLocation()));
// base dir
assertEquals(1, files.length);
assertEquals("base_0000004_v0000016", files[0].getPath().getName());
files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
// files
assertEquals(2, files.length);
Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
}
use of org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker in project hive by apache.
the class TestCompactor method testStatsAfterCompactionPartTbl.
/**
* After each major compaction, stats need to be updated on the table
* 1. create a partitioned ORC backed table (Orc is currently required by ACID)
* 2. populate with data
* 3. compute stats
* 4. Trigger major compaction on one of the partitions (which should update stats)
* 5. check that stats have been updated for that partition only
*
* @throws Exception todo:
* 4. add a test with sorted table?
*/
@Test
public void testStatsAfterCompactionPartTbl() throws Exception {
// as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
Table table = msClient.getTable(dbName, tblName);
// compute stats before compaction
CompactionInfo ci = new CompactionInfo(dbName, tblName, "bkt=0", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
ci = new CompactionInfo(dbName, tblName, "bkt=1", CompactionType.MAJOR);
Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
// Check basic stats are collected
org.apache.hadoop.hive.ql.metadata.Table hiveTable = Hive.get().getTable(tblName);
List<org.apache.hadoop.hive.ql.metadata.Partition> partitions = Hive.get().getPartitions(hiveTable);
Map<String, String> parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1373", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
if (1 != compacts.size()) {
Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
}
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
// Check basic stats are updated for partition bkt=0, but not updated for partition bkt=1
partitions = Hive.get().getPartitions(hiveTable);
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "801", parameters.get("totalSize"));
parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
}
Aggregations