use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestTxnCommands2 method testInitiatorWithMultipleFailedCompactionsForVariousTblProperties.
void testInitiatorWithMultipleFailedCompactionsForVariousTblProperties(String tblProperties) throws Exception {
String tblName = "hive12353";
runStatementOnDriver("drop table if exists " + tblName);
runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC TBLPROPERTIES ( " + tblProperties + " )");
hiveConf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 4);
for (int i = 0; i < 5; i++) {
//generate enough delta files so that Initiator can trigger auto compaction
runStatementOnDriver("insert into " + tblName + " values(" + (i + 1) + ", 'foo'),(" + (i + 2) + ", 'bar'),(" + (i + 3) + ", 'baz')");
}
hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION, true);
int numFailedCompactions = hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
AtomicBoolean stop = new AtomicBoolean(true);
//create failed compactions
for (int i = 0; i < numFailedCompactions; i++) {
//each of these should fail
txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
runWorker(hiveConf);
}
//this should not schedule a new compaction due to prior failures, but will create Attempted entry
Initiator init = new Initiator();
init.setThreadId((int) init.getId());
init.setHiveConf(hiveConf);
init.init(stop, new AtomicBoolean());
init.run();
int numAttemptedCompactions = 1;
checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions, 0, 0, 0, 0, numFailedCompactions + numAttemptedCompactions), countCompacts(txnHandler));
hiveConf.setTimeVar(HiveConf.ConfVars.COMPACTOR_HISTORY_REAPER_INTERVAL, 10, TimeUnit.MILLISECONDS);
AcidCompactionHistoryService compactionHistoryService = new AcidCompactionHistoryService();
//should not remove anything from history
runHouseKeeperService(compactionHistoryService, hiveConf);
checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions, 0, 0, 0, 0, numFailedCompactions + numAttemptedCompactions), countCompacts(txnHandler));
txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MAJOR));
//will fail
runWorker(hiveConf);
txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
//will fail
runWorker(hiveConf);
init.run();
numAttemptedCompactions++;
init.run();
numAttemptedCompactions++;
checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions + 2, 0, 0, 0, 0, numFailedCompactions + 2 + numAttemptedCompactions), countCompacts(txnHandler));
//should remove history so that we have
runHouseKeeperService(compactionHistoryService, hiveConf);
//COMPACTOR_HISTORY_RETENTION_FAILED failed compacts left (and no other since we only have failed ones here)
checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 0, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED)), countCompacts(txnHandler));
hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION, false);
txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
//at this point "show compactions" should have (COMPACTOR_HISTORY_RETENTION_FAILED) failed + 1 initiated (explicitly by user)
checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 1, 0, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
//will succeed and transition to Initiated->Working->Ready for Cleaning
runWorker(hiveConf);
checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 1, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
// transition to Success state
runCleaner(hiveConf);
//should not purge anything as all items within retention sizes
runHouseKeeperService(compactionHistoryService, hiveConf);
checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 0, 1, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class Initiator method requestCompaction.
private void requestCompaction(CompactionInfo ci, String runAs, CompactionType type) throws MetaException {
CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, type);
if (ci.partName != null)
rqst.setPartitionname(ci.partName);
rqst.setRunas(runAs);
LOG.info("Requesting compaction: " + rqst);
CompactionResponse resp = txnHandler.compact(rqst);
if (resp.isAccepted()) {
ci.id = resp.getId();
}
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactor method minorCompactWhileStreaming.
@Test
public void minorCompactWhileStreaming() throws Exception {
String dbName = "default";
String tblName = "cws";
List<String> colNames = Arrays.asList("a", "b");
String columnNamesProperty = "a,b";
String columnTypesProperty = "int:string";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
try {
// Write a couple of batches
for (int i = 0; i < 2; i++) {
writeBatch(connection, writer, false);
}
// Start a third batch, but don't close it.
writeBatch(connection, writer, true);
// Now, compact
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean(true);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
String[] names = new String[stat.length];
Path resultFile = null;
for (int i = 0; i < names.length; i++) {
names[i] = stat[i].getPath().getName();
if (names[i].equals("delta_0000001_0000004")) {
resultFile = stat[i].getPath();
}
}
Arrays.sort(names);
String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
if (!Arrays.deepEquals(expected, names)) {
Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
}
checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
} finally {
connection.close();
}
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactor method majorCompactWhileStreaming.
@Test
public void majorCompactWhileStreaming() throws Exception {
String dbName = "default";
String tblName = "cws";
List<String> colNames = Arrays.asList("a", "b");
String columnNamesProperty = "a,b";
String columnTypesProperty = "int:string";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true') ", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
try {
// Write a couple of batches
for (int i = 0; i < 2; i++) {
writeBatch(connection, writer, false);
}
// Start a third batch, but don't close it.
writeBatch(connection, writer, true);
// Now, compact
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean(true);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
if (1 != stat.length) {
Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
}
String name = stat[0].getPath().getName();
Assert.assertEquals(name, "base_0000004");
checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
} finally {
connection.close();
}
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactor method minorCompactWhileStreamingWithSplitUpdate.
@Test
public void minorCompactWhileStreamingWithSplitUpdate() throws Exception {
String dbName = "default";
String tblName = "cws";
List<String> colNames = Arrays.asList("a", "b");
String columnNamesProperty = "a,b";
String columnTypesProperty = "int:string";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
try {
// Write a couple of batches
for (int i = 0; i < 2; i++) {
writeBatch(connection, writer, false);
}
// Start a third batch, but don't close it.
writeBatch(connection, writer, true);
// Now, compact
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean(true);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
String[] names = new String[stat.length];
Path resultFile = null;
for (int i = 0; i < names.length; i++) {
names[i] = stat[i].getPath().getName();
if (names[i].equals("delta_0000001_0000004")) {
resultFile = stat[i].getPath();
}
}
Arrays.sort(names);
String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
if (!Arrays.deepEquals(expected, names)) {
Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
}
checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
// Verify that we have got correct set of delete_deltas also
FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
String[] deleteDeltas = new String[deleteDeltaStat.length];
Path minorCompactedDeleteDelta = null;
for (int i = 0; i < deleteDeltas.length; i++) {
deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
if (deleteDeltas[i].equals("delete_delta_0000001_0000004")) {
minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
}
}
Arrays.sort(deleteDeltas);
String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000004" };
if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
}
// There should be no rows in the delete_delta because there have been no delete events.
checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L);
} finally {
connection.close();
}
}
Aggregations