use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.
the class TestCompactor method testHeartbeatShutdownOnFailedCompaction.
@Test
public void testHeartbeatShutdownOnFailedCompaction() throws Exception {
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
// Commit will throw an exception
IMetaStoreClient mockedClient = Mockito.spy(new HiveMetaStoreClient(conf));
doThrow(new RuntimeException("Simulating RuntimeException from CompactionTxn.commit")).when(mockedClient).commitTxn(Mockito.anyLong());
doAnswer(invocation -> {
Object o = invocation.callRealMethod();
// Check if the heartbeating is running
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().anyMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
return o;
}).when(mockedClient).openTxn(any(), any());
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
Worker worker = Mockito.spy(new Worker());
worker.setThreadId((int) worker.getId());
worker.setConf(conf);
worker.init(new AtomicBoolean(true));
FieldSetter.setField(worker, RemoteCompactorThread.class.getDeclaredField("msc"), mockedClient);
worker.run();
// Check if the transaction was opened
verify(mockedClient, times(1)).openTxn(any(), any());
// Check if the heartbeating is properly terminated
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().noneMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
}
use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.
the class TestCompactor method testCleanDynPartAbortNoDataLoss.
@Test
public void testCleanDynPartAbortNoDataLoss() throws Exception {
String dbName = "default";
String tblName = "cws";
HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 1);
executeStatementOnDriver("insert into " + tblName + " partition (a) values (1, '1')", driver);
executeStatementOnDriver("update " + tblName + " set b='2' where a=1", driver);
executeStatementOnDriver("insert into " + tblName + " partition (a) values (2, '2')", driver);
executeStatementOnDriver("update " + tblName + " set b='3' where a=2", driver);
connection.beginTransaction();
connection.write("1,1".getBytes());
connection.write("2,2".getBytes());
connection.abortTransaction();
executeStatementOnDriver("insert into " + tblName + " partition (a) values (3, '3')", driver);
executeStatementOnDriver("update " + tblName + " set b='4' where a=3", driver);
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 4, count);
runWorker(conf);
runWorker(conf);
runWorker(conf);
runWorker(conf);
// Cleaning should happen in threads concurrently for the minor compaction and the clean abort one.
runCleaner(conf);
count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Partition p1 = msClient.getPartition(dbName, tblName, "a=1"), p2 = msClient.getPartition(dbName, tblName, "a=2"), p3 = msClient.getPartition(dbName, tblName, "a=3");
msClient.close();
FileSystem fs = FileSystem.get(conf);
verifyDeltaCount(p1.getSd(), fs, 0);
verifyHasBase(p1.getSd(), fs, "base_0000002_v0000010");
verifyDeltaCount(p2.getSd(), fs, 0);
verifyHasBase(p2.getSd(), fs, "base_0000004_v0000012");
verifyDeltaCount(p3.getSd(), fs, 0);
verifyHasBase(p3.getSd(), fs, "base_0000007_v0000014");
}
use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.
the class TestCompactor method testCleanAbortCompactAfter2ndCommitAbort.
@Test
public void testCleanAbortCompactAfter2ndCommitAbort() throws Exception {
String dbName = "default";
String tblName = "cws";
HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 2);
connection.beginTransaction();
connection.write("1,1".getBytes());
connection.write("2,2".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("3,2".getBytes());
connection.write("3,3".getBytes());
connection.abortTransaction();
assertAndCompactCleanAbort(dbName, tblName, true, true);
connection.close();
}
use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.
the class TestCompactor method testCleanAbortAndMinorCompact.
@Test
public void testCleanAbortAndMinorCompact() throws Exception {
String dbName = "default";
String tblName = "cws";
HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 1);
connection.beginTransaction();
connection.write("1,1".getBytes());
connection.write("2,2".getBytes());
connection.abortTransaction();
executeStatementOnDriver("insert into " + tblName + " partition (a) values (1, '1')", driver);
executeStatementOnDriver("delete from " + tblName + " where b=1", driver);
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 2, count);
runWorker(conf);
runWorker(conf);
// Cleaning should happen in threads concurrently for the minor compaction and the clean abort one.
runCleaner(conf);
count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 0, count);
count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
}
use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.
the class TestCompactor method testSkippedCompactionCleanerKeepsAborted.
/**
* There is a special case handled in Compaction Worker that will skip compaction
* if there is only one valid delta. But this compaction will be still cleaned up, if there are aborted directories.
* @see Worker.isEnoughToCompact
* However if no compaction was done, deltas containing mixed aborted / committed writes from streaming can not be cleaned
* and the metadata belonging to those aborted transactions can not be removed.
* @throws Exception ex
*/
@Test
public void testSkippedCompactionCleanerKeepsAborted() throws Exception {
String dbName = "default";
String tblName = "cws";
String agentInfo = "UT_" + Thread.currentThread().getName();
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(b STRING) " + " PARTITIONED BY (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("alter table " + tblName + " add partition(a=1)", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
// Create initial aborted txn
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Collections.singletonList("1")).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
connection.beginTransaction();
connection.write("3,1".getBytes());
connection.write("4,1".getBytes());
connection.abortTransaction();
connection.close();
// Create a sequence of commit, abort, commit to the same delta folder
connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Collections.singletonList("1")).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(3).connect();
connection.beginTransaction();
connection.write("1,1".getBytes());
connection.write("2,1".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("3,1".getBytes());
connection.write("4,1".getBytes());
connection.abortTransaction();
connection.beginTransaction();
connection.write("5,1".getBytes());
connection.write("6,1".getBytes());
connection.commitTransaction();
connection.close();
// Check that aborted are not read back
driver.run("select * from cws");
List res = new ArrayList();
driver.getFetchTask().fetch(res);
Assert.assertEquals(4, res.size());
int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
Assert.assertEquals("There should be 2 record for two aborted transaction", 2, count);
// Start a compaction, that will be skipped, because only one valid delta is there
driver.run("alter table cws partition(a='1') compact 'minor'");
runWorker(conf);
// Cleaner should not delete info about aborted txn 2
runCleaner(conf);
txnHandler.cleanEmptyAbortedAndCommittedTxns();
count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
Assert.assertEquals("There should be 1 record for the second aborted transaction", 1, count);
driver.run("select * from cws");
res.clear();
driver.getFetchTask().fetch(res);
Assert.assertEquals(4, res.size());
}
Aggregations