use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactor method majorCompactWhileStreamingForSplitUpdate.
@Test
public void majorCompactWhileStreamingForSplitUpdate() throws Exception {
String dbName = "default";
String tblName = "cws";
List<String> colNames = Arrays.asList("a", "b");
String columnNamesProperty = "a,b";
String columnTypesProperty = "int:string";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true', " + "'transactional_properties'='default') ", // this turns on split-update U=D+I
driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
try {
// Write a couple of batches
for (int i = 0; i < 2; i++) {
writeBatch(connection, writer, false);
}
// Start a third batch, but don't close it.
writeBatch(connection, writer, true);
// Now, compact
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean(true);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
if (1 != stat.length) {
Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
}
String name = stat[0].getPath().getName();
Assert.assertEquals(name, "base_0000004");
checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
} finally {
connection.close();
}
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactor method testMinorCompactionForSplitUpdateWithOnlyInserts.
@Test
public void testMinorCompactionForSplitUpdateWithOnlyInserts() throws Exception {
String agentInfo = "UT_" + Thread.currentThread().getName();
String dbName = "default";
String tblName = "cws";
List<String> colNames = Arrays.asList("a", "b");
String columnNamesProperty = "a,b";
String columnTypesProperty = "int:string";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
// Insert some data -> this will generate only insert deltas and no delete deltas: delta_1_1
executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
// Insert some data -> this will again generate only insert deltas and no delete deltas: delta_2_2
executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 'bar')", driver);
// Now, compact
// One important thing to note in this test is that minor compaction always produces
// delta_x_y and a counterpart delete_delta_x_y, even when there are no delete_delta events.
// Such a choice has been made to simplify processing of AcidUtils.getAcidState().
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean(true);
AtomicBoolean looped = new AtomicBoolean();
t.init(stop, looped);
t.run();
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
FileSystem fs = FileSystem.get(conf);
// Verify that we have got correct set of deltas.
FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
String[] deltas = new String[stat.length];
Path minorCompactedDelta = null;
for (int i = 0; i < deltas.length; i++) {
deltas[i] = stat[i].getPath().getName();
if (deltas[i].equals("delta_0000001_0000002")) {
minorCompactedDelta = stat[i].getPath();
}
}
Arrays.sort(deltas);
String[] expectedDeltas = new String[] { "delta_0000001_0000001_0000", "delta_0000001_0000002", "delta_0000002_0000002_0000" };
if (!Arrays.deepEquals(expectedDeltas, deltas)) {
Assert.fail("Expected: " + Arrays.toString(expectedDeltas) + ", found: " + Arrays.toString(deltas));
}
checkExpectedTxnsPresent(null, new Path[] { minorCompactedDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 2L);
// Verify that we have got correct set of delete_deltas.
FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
String[] deleteDeltas = new String[deleteDeltaStat.length];
Path minorCompactedDeleteDelta = null;
for (int i = 0; i < deleteDeltas.length; i++) {
deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
if (deleteDeltas[i].equals("delete_delta_0000001_0000002")) {
minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
}
}
Arrays.sort(deleteDeltas);
String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000002" };
if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
}
// There should be no rows in the delete_delta because there have been no delete events.
checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L);
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactionTxnHandler method testFindNextToCompact.
@Test
public void testFindNextToCompact() throws Exception {
CompactionRequest rqst = new CompactionRequest("foo", "bar", CompactionType.MINOR);
rqst.setPartitionname("ds=today");
txnHandler.compact(rqst);
long now = System.currentTimeMillis();
CompactionInfo ci = txnHandler.findNextToCompact("fred");
assertNotNull(ci);
assertEquals("foo", ci.dbname);
assertEquals("bar", ci.tableName);
assertEquals("ds=today", ci.partName);
assertEquals(CompactionType.MINOR, ci.type);
assertNull(ci.runAs);
assertNull(txnHandler.findNextToCompact("fred"));
txnHandler.setRunAs(ci.id, "bob");
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
assertEquals(1, compacts.size());
ShowCompactResponseElement c = compacts.get(0);
assertEquals("foo", c.getDbname());
assertEquals("bar", c.getTablename());
assertEquals("ds=today", c.getPartitionname());
assertEquals(CompactionType.MINOR, c.getType());
assertEquals("working", c.getState());
assertTrue(c.getStart() - 5000 < now && c.getStart() + 5000 > now);
assertEquals("fred", c.getWorkerid());
assertEquals("bob", c.getRunAs());
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactionTxnHandler method testRevokeFromLocalWorkers.
@Test
public void testRevokeFromLocalWorkers() throws Exception {
CompactionRequest rqst = new CompactionRequest("foo", "bar", CompactionType.MINOR);
txnHandler.compact(rqst);
rqst = new CompactionRequest("foo", "baz", CompactionType.MINOR);
txnHandler.compact(rqst);
rqst = new CompactionRequest("foo", "bazzoo", CompactionType.MINOR);
txnHandler.compact(rqst);
assertNotNull(txnHandler.findNextToCompact("fred-193892"));
assertNotNull(txnHandler.findNextToCompact("bob-193892"));
assertNotNull(txnHandler.findNextToCompact("fred-193893"));
txnHandler.revokeFromLocalWorkers("fred");
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
assertEquals(3, compacts.size());
boolean sawWorkingBob = false;
int initiatedCount = 0;
for (ShowCompactResponseElement c : compacts) {
if (c.getState().equals("working")) {
assertEquals("bob-193892", c.getWorkerid());
sawWorkingBob = true;
} else if (c.getState().equals("initiated")) {
initiatedCount++;
} else {
fail("Unexpected state");
}
}
assertTrue(sawWorkingBob);
assertEquals(2, initiatedCount);
}
use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.
the class TestCompactionTxnHandler method testRevokeTimedOutWorkers.
@Test
public void testRevokeTimedOutWorkers() throws Exception {
CompactionRequest rqst = new CompactionRequest("foo", "bar", CompactionType.MINOR);
txnHandler.compact(rqst);
rqst = new CompactionRequest("foo", "baz", CompactionType.MINOR);
txnHandler.compact(rqst);
assertNotNull(txnHandler.findNextToCompact("fred-193892"));
Thread.sleep(200);
assertNotNull(txnHandler.findNextToCompact("fred-193892"));
txnHandler.revokeTimedoutWorkers(100);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
assertEquals(2, compacts.size());
boolean sawWorking = false, sawInitiated = false;
for (ShowCompactResponseElement c : compacts) {
if (c.getState().equals("working"))
sawWorking = true;
else if (c.getState().equals("initiated"))
sawInitiated = true;
else
fail("Unexpected state");
}
assertTrue(sawWorking);
assertTrue(sawInitiated);
}
Aggregations