Examples with HiveStreamingConnection - org.apache.hive.streaming.HiveStreamingConnection

Example 6 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testHeartbeatShutdownOnFailedCompaction.

@Test
public void testHeartbeatShutdownOnFailedCompaction() throws Exception {
    String dbName = "default";
    String tblName = "compaction_test";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
    connection.beginTransaction();
    connection.write("55, 'London'".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("56, 'Paris'".getBytes());
    connection.commitTransaction();
    connection.close();
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
    execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    // Commit will throw an exception
    IMetaStoreClient mockedClient = Mockito.spy(new HiveMetaStoreClient(conf));
    doThrow(new RuntimeException("Simulating RuntimeException from CompactionTxn.commit")).when(mockedClient).commitTxn(Mockito.anyLong());
    doAnswer(invocation -> {
        Object o = invocation.callRealMethod();
        // Check if the heartbeating is running
        Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().anyMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
        return o;
    }).when(mockedClient).openTxn(any(), any());
    // Do a major compaction
    CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
    rqst.setPartitionname("bkt=0");
    txnHandler.compact(rqst);
    Worker worker = Mockito.spy(new Worker());
    worker.setThreadId((int) worker.getId());
    worker.setConf(conf);
    worker.init(new AtomicBoolean(true));
    FieldSetter.setField(worker, RemoteCompactorThread.class.getDeclaredField("msc"), mockedClient);
    worker.run();
    // Check if the transaction was opened
    verify(mockedClient, times(1)).openTxn(any(), any());
    // Check if the heartbeating is properly terminated
    Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().noneMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
}

Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) FileStatus(org.apache.hadoop.fs.FileStatus) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) Mockito.doThrow(org.mockito.Mockito.doThrow) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OrcConf(org.apache.orc.OrcConf) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) After(org.junit.After) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileUtil(org.apache.hadoop.fs.FileUtil) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IDriver(org.apache.hadoop.hive.ql.IDriver) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Retry(org.apache.hive.common.util.Retry) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) HCatUtil(org.apache.hive.hcatalog.common.HCatUtil) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) VISIBILITY_PATTERN(org.apache.hadoop.hive.common.AcidConstants.VISIBILITY_PATTERN) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) TestTxnCommands2.runCleaner(org.apache.hadoop.hive.ql.TestTxnCommands2.runCleaner) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) TestTxnCommands2.runInitiator(org.apache.hadoop.hive.ql.TestTxnCommands2.runInitiator) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) Constants(org.apache.hadoop.hive.conf.Constants) Before(org.junit.Before) Hive(org.apache.hadoop.hive.ql.metadata.Hive) StreamingException(org.apache.hive.streaming.StreamingException) Logger(org.slf4j.Logger) FileWriter(java.io.FileWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) File(java.io.File) Table(org.apache.hadoop.hive.metastore.api.Table) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) Assert(org.junit.Assert) Collections(java.util.Collections) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Assert.assertEquals(org.junit.Assert.assertEquals) TemporaryFolder(org.junit.rules.TemporaryFolder) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 7 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testCleanDynPartAbortNoDataLoss.

@Test
public void testCleanDynPartAbortNoDataLoss() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 1);
    executeStatementOnDriver("insert into " + tblName + " partition (a) values (1, '1')", driver);
    executeStatementOnDriver("update " + tblName + " set b='2' where a=1", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (a) values (2, '2')", driver);
    executeStatementOnDriver("update " + tblName + " set b='3' where a=2", driver);
    connection.beginTransaction();
    connection.write("1,1".getBytes());
    connection.write("2,2".getBytes());
    connection.abortTransaction();
    executeStatementOnDriver("insert into " + tblName + " partition (a) values (3, '3')", driver);
    executeStatementOnDriver("update " + tblName + " set b='4' where a=3", driver);
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
    runInitiator(conf);
    int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 4, count);
    runWorker(conf);
    runWorker(conf);
    runWorker(conf);
    runWorker(conf);
    // Cleaning should happen in threads concurrently for the minor compaction and the clean abort one.
    runCleaner(conf);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Partition p1 = msClient.getPartition(dbName, tblName, "a=1"), p2 = msClient.getPartition(dbName, tblName, "a=2"), p3 = msClient.getPartition(dbName, tblName, "a=3");
    msClient.close();
    FileSystem fs = FileSystem.get(conf);
    verifyDeltaCount(p1.getSd(), fs, 0);
    verifyHasBase(p1.getSd(), fs, "base_0000002_v0000010");
    verifyDeltaCount(p2.getSd(), fs, 0);
    verifyHasBase(p2.getSd(), fs, "base_0000004_v0000012");
    verifyDeltaCount(p3.getSd(), fs, 0);
    verifyHasBase(p3.getSd(), fs, "base_0000007_v0000014");
}

Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 8 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testCleanAbortCompactAfter2ndCommitAbort.

@Test
public void testCleanAbortCompactAfter2ndCommitAbort() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 2);
    connection.beginTransaction();
    connection.write("1,1".getBytes());
    connection.write("2,2".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("3,2".getBytes());
    connection.write("3,3".getBytes());
    connection.abortTransaction();
    assertAndCompactCleanAbort(dbName, tblName, true, true);
    connection.close();
}

Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Example 9 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testCleanAbortAndMinorCompact.

@Test
public void testCleanAbortAndMinorCompact() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    HiveStreamingConnection connection = prepareTableAndConnection(dbName, tblName, 1);
    connection.beginTransaction();
    connection.write("1,1".getBytes());
    connection.write("2,2".getBytes());
    connection.abortTransaction();
    executeStatementOnDriver("insert into " + tblName + " partition (a) values (1, '1')", driver);
    executeStatementOnDriver("delete from " + tblName + " where b=1", driver);
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
    runInitiator(conf);
    int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 2, count);
    runWorker(conf);
    runWorker(conf);
    // Cleaning should happen in threads concurrently for the minor compaction and the clean abort one.
    runCleaner(conf);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from COMPACTION_QUEUE");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from COMPACTION_QUEUE"), 0, count);
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals(TestTxnDbUtil.queryToString(conf, "select * from TXN_COMPONENTS"), 0, count);
}

Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Example 10 with HiveStreamingConnection

use of org.apache.hive.streaming.HiveStreamingConnection in project hive by apache.

the class TestCompactor method testSkippedCompactionCleanerKeepsAborted.

/**
 * There is a special case handled in Compaction Worker that will skip compaction
 * if there is only one valid delta. But this compaction will be still cleaned up, if there are aborted directories.
 * @see Worker.isEnoughToCompact
 * However if no compaction was done, deltas containing mixed aborted / committed writes from streaming can not be cleaned
 * and the metadata belonging to those aborted transactions can not be removed.
 * @throws Exception ex
 */
@Test
public void testSkippedCompactionCleanerKeepsAborted() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String agentInfo = "UT_" + Thread.currentThread().getName();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(b STRING) " + " PARTITIONED BY (a INT) STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("alter table " + tblName + " add partition(a=1)", driver);
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    // Create initial aborted txn
    HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Collections.singletonList("1")).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
    connection.beginTransaction();
    connection.write("3,1".getBytes());
    connection.write("4,1".getBytes());
    connection.abortTransaction();
    connection.close();
    // Create a sequence of commit, abort, commit to the same delta folder
    connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Collections.singletonList("1")).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(3).connect();
    connection.beginTransaction();
    connection.write("1,1".getBytes());
    connection.write("2,1".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("3,1".getBytes());
    connection.write("4,1".getBytes());
    connection.abortTransaction();
    connection.beginTransaction();
    connection.write("5,1".getBytes());
    connection.write("6,1".getBytes());
    connection.commitTransaction();
    connection.close();
    // Check that aborted are not read back
    driver.run("select * from cws");
    List res = new ArrayList();
    driver.getFetchTask().fetch(res);
    Assert.assertEquals(4, res.size());
    int count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals("There should be 2 record for two aborted transaction", 2, count);
    // Start a compaction, that will be skipped, because only one valid delta is there
    driver.run("alter table cws partition(a='1') compact 'minor'");
    runWorker(conf);
    // Cleaner should not delete info about aborted txn 2
    runCleaner(conf);
    txnHandler.cleanEmptyAbortedAndCommittedTxns();
    count = TestTxnDbUtil.countQueryAgent(conf, "select count(*) from TXN_COMPONENTS");
    Assert.assertEquals("There should be 1 record for the second aborted transaction", 1, count);
    driver.run("select * from cws");
    res.clear();
    driver.getFetchTask().fetch(res);
    Assert.assertEquals(4, res.size());
}

Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Aggregations

HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)11 Test (org.junit.Test)11 FileSystem (org.apache.hadoop.fs.FileSystem)4 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)4 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 Path (org.apache.hadoop.fs.Path)3 RemoteIterator (org.apache.hadoop.fs.RemoteIterator)3 Partition (org.apache.hadoop.hive.metastore.api.Partition)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)3 Lists (com.google.common.collect.Lists)2 File (java.io.File)2 FileWriter (java.io.FileWriter)2 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2