use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class DDLTask method showCompactions.
private int showCompactions(Hive db, ShowCompactionsDesc desc) throws HiveException {
// Call the metastore to get the status of all known compactions (completed get purged eventually)
ShowCompactResponse rsp = db.showCompactions();
// Write the results into the file
final String noVal = " --- ";
DataOutputStream os = getOutputStream(desc.getResFile());
try {
// Write a header
os.writeBytes("CompactionId");
os.write(separator);
os.writeBytes("Database");
os.write(separator);
os.writeBytes("Table");
os.write(separator);
os.writeBytes("Partition");
os.write(separator);
os.writeBytes("Type");
os.write(separator);
os.writeBytes("State");
os.write(separator);
os.writeBytes("Worker");
os.write(separator);
os.writeBytes("Start Time");
os.write(separator);
os.writeBytes("Duration(ms)");
os.write(separator);
os.writeBytes("HadoopJobId");
os.write(terminator);
if (rsp.getCompacts() != null) {
for (ShowCompactResponseElement e : rsp.getCompacts()) {
os.writeBytes(Long.toString(e.getId()));
os.write(separator);
os.writeBytes(e.getDbname());
os.write(separator);
os.writeBytes(e.getTablename());
os.write(separator);
String part = e.getPartitionname();
os.writeBytes(part == null ? noVal : part);
os.write(separator);
os.writeBytes(e.getType().toString());
os.write(separator);
os.writeBytes(e.getState());
os.write(separator);
String wid = e.getWorkerid();
os.writeBytes(wid == null ? noVal : wid);
os.write(separator);
os.writeBytes(e.isSetStart() ? Long.toString(e.getStart()) : noVal);
os.write(separator);
os.writeBytes(e.isSetEndTime() ? Long.toString(e.getEndTime() - e.getStart()) : noVal);
os.write(separator);
os.writeBytes(e.isSetHadoopJobId() ? e.getHadoopJobId() : noVal);
os.write(terminator);
}
}
} catch (IOException e) {
LOG.warn("show compactions: ", e);
return 1;
} finally {
IOUtils.closeStream(os);
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class DDLTask method compact.
private int compact(Hive db, AlterTableSimpleDesc desc) throws HiveException {
Table tbl = db.getTable(desc.getTableName());
if (!AcidUtils.isTransactionalTable(tbl)) {
throw new HiveException(ErrorMsg.NONACID_COMPACTION_NOT_SUPPORTED, tbl.getDbName(), tbl.getTableName());
}
String partName = null;
if (desc.getPartSpec() == null) {
// Compaction can only be done on the whole table if the table is non-partitioned.
if (tbl.isPartitioned()) {
throw new HiveException(ErrorMsg.NO_COMPACTION_PARTITION);
}
} else {
Map<String, String> partSpec = desc.getPartSpec();
List<Partition> partitions = db.getPartitions(tbl, partSpec);
if (partitions.size() > 1) {
throw new HiveException(ErrorMsg.TOO_MANY_COMPACTION_PARTITIONS);
} else if (partitions.size() == 0) {
throw new HiveException(ErrorMsg.INVALID_PARTITION_SPEC);
}
partName = partitions.get(0).getName();
}
CompactionResponse resp = db.compact2(tbl.getDbName(), tbl.getTableName(), partName, desc.getCompactionType(), desc.getProps());
if (resp.isAccepted()) {
console.printInfo("Compaction enqueued with id " + resp.getId());
} else {
console.printInfo("Compaction already enqueued with id " + resp.getId() + "; State is " + resp.getState());
}
if (desc.isBlocking() && resp.isAccepted()) {
StringBuilder progressDots = new StringBuilder();
long waitTimeMs = 1000;
wait: while (true) {
// double wait time until 5min
waitTimeMs = waitTimeMs * 2;
waitTimeMs = waitTimeMs < 5 * 60 * 1000 ? waitTimeMs : 5 * 60 * 1000;
try {
Thread.sleep(waitTimeMs);
} catch (InterruptedException ex) {
console.printInfo("Interrupted while waiting for compaction with id=" + resp.getId());
break;
}
// this could be expensive when there are a lot of compactions....
// todo: update to search by ID once HIVE-13353 is done
ShowCompactResponse allCompactions = db.showCompactions();
for (ShowCompactResponseElement compaction : allCompactions.getCompacts()) {
if (resp.getId() != compaction.getId()) {
continue;
}
switch(compaction.getState()) {
case TxnStore.WORKING_RESPONSE:
case TxnStore.INITIATED_RESPONSE:
// still working
console.printInfo(progressDots.toString());
progressDots.append(".");
continue wait;
default:
// done
console.printInfo("Compaction with id " + resp.getId() + " finished with status: " + compaction.getState());
break wait;
}
}
}
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method testCompactionOnDataLoadedInPath.
/**
* Tests compaction of tables that were populated by LOAD DATA INPATH statements.
*
* In this scenario original ORC files are a structured in the following way:
* comp3
* |--delta_0000001_0000001_0000
* |--000000_0
* |--delta_0000002_0000002_0000
* |--000000_0
* |--000001_0
*
* ..where comp3 table is not bucketed.
*
* @throws Exception
*/
@Test
public void testCompactionOnDataLoadedInPath() throws Exception {
// Setup of LOAD INPATH scenario.
executeStatementOnDriver("drop table if exists comp0", driver);
executeStatementOnDriver("drop table if exists comp1", driver);
executeStatementOnDriver("drop table if exists comp3", driver);
executeStatementOnDriver("create external table comp0 (a string)", driver);
executeStatementOnDriver("insert into comp0 values ('1111111111111')", driver);
executeStatementOnDriver("insert into comp0 values ('2222222222222')", driver);
executeStatementOnDriver("insert into comp0 values ('3333333333333')", driver);
executeStatementOnDriver("create external table comp1 stored as orc as select * from comp0", driver);
executeStatementOnDriver("create table comp3 (a string) stored as orc " + "TBLPROPERTIES ('transactional'='true')", driver);
IMetaStoreClient hmsClient = new HiveMetaStoreClient(conf);
Table table = hmsClient.getTable("default", "comp1");
FileSystem fs = FileSystem.get(conf);
Path path000 = fs.listStatus(new Path(table.getSd().getLocation()))[0].getPath();
Path path001 = new Path(path000.toString().replace("000000", "000001"));
Path path002 = new Path(path000.toString().replace("000000", "000002"));
fs.copyFromLocalFile(path000, path001);
fs.copyFromLocalFile(path000, path002);
executeStatementOnDriver("load data inpath '" + path002.toString() + "' into table comp3", driver);
executeStatementOnDriver("load data inpath '" + path002.getParent().toString() + "' into table comp3", driver);
// Run compaction.
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
CompactionRequest rqst = new CompactionRequest("default", "comp3", CompactionType.MAJOR);
txnHandler.compact(rqst);
runWorker(conf);
ShowCompactRequest scRqst = new ShowCompactRequest();
List<ShowCompactResponseElement> compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(1, compacts.size());
assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
runCleaner(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(1, compacts.size());
assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
// Check compacted content and file structure.
table = hmsClient.getTable("default", "comp3");
List<String> rs = execSelectAndDumpData("select * from comp3", driver, "select");
assertEquals(9, rs.size());
assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
FileStatus[] files = fs.listStatus(new Path(table.getSd().getLocation()));
// base dir
assertEquals(1, files.length);
assertEquals("base_0000002_v0000012", files[0].getPath().getName());
files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
// files
assertEquals(2, files.length);
Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
// Another insert into the newly compacted table.
executeStatementOnDriver("insert into comp3 values ('4444444444444')", driver);
// Compact with extra row too.
txnHandler.compact(rqst);
runWorker(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(2, compacts.size());
assertEquals(TxnStore.CLEANING_RESPONSE, compacts.get(0).getState());
runCleaner(conf);
compacts = txnHandler.showCompact(scRqst).getCompacts();
assertEquals(2, compacts.size());
assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
// Check compacted content and file structure.
rs = execSelectAndDumpData("select * from comp3", driver, "select");
assertEquals(10, rs.size());
assertEquals(3, rs.stream().filter(p -> "1111111111111".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "2222222222222".equals(p)).count());
assertEquals(3, rs.stream().filter(p -> "3333333333333".equals(p)).count());
assertEquals(1, rs.stream().filter(p -> "4444444444444".equals(p)).count());
files = fs.listStatus(new Path(table.getSd().getLocation()));
// base dir
assertEquals(1, files.length);
assertEquals("base_0000004_v0000016", files[0].getPath().getName());
files = fs.listStatus(files[0].getPath(), AcidUtils.bucketFileFilter);
// files
assertEquals(2, files.length);
Arrays.stream(files).filter(p -> "bucket_00000".equals(p.getPath().getName())).count();
Arrays.stream(files).filter(p -> "bucket_00001".equals(p.getPath().getName())).count();
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method schemaEvolutionAddColDynamicPartitioningInsert.
/**
* Simple schema evolution add columns with partitioning.
*
* @throws Exception
*/
@Test
public void schemaEvolutionAddColDynamicPartitioningInsert() throws Exception {
String tblName = "dpct";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
// First INSERT round.
executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
// ALTER TABLE ... ADD COLUMNS
executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
// Validate there is an added NULL for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(2, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
// Second INSERT round with new inserts into previously existing partition 'yesterday'.
executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
// Validate there the new insertions for column c.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(4, compacts.size());
SortedSet<String> partNames = new TreeSet<String>();
verifyCompactions(compacts, partNames, tblName);
List<String> names = new ArrayList<String>(partNames);
Assert.assertEquals("ds=last_century", names.get(0));
Assert.assertEquals("ds=soon", names.get(1));
Assert.assertEquals("ds=today", names.get(2));
Assert.assertEquals("ds=yesterday", names.get(3));
// Validate after compaction.
executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
Assert.assertEquals(5, valuesReadFromHiveDriver.size());
Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
}
use of org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement in project hive by apache.
the class TestCompactor method verifyCompactions.
private void verifyCompactions(List<ShowCompactResponseElement> compacts, SortedSet<String> partNames, String tblName) {
for (ShowCompactResponseElement compact : compacts) {
Assert.assertEquals("default", compact.getDbname());
Assert.assertEquals(tblName, compact.getTablename());
Assert.assertEquals("initiated", compact.getState());
partNames.add(compact.getPartitionname());
}
}
Aggregations