use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestCompactor method prepareTableAndConnection.
private HiveStreamingConnection prepareTableAndConnection(String dbName, String tblName, int batchSize) throws Exception {
String agentInfo = "UT_" + Thread.currentThread().getName();
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(b STRING) " + // currently ACID requires table to be bucketed
" PARTITIONED BY (a INT)" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
// Create three folders with two different transactions
return HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(batchSize).connect();
}
use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestReplicationOfHiveStreaming method testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne.
@Test
public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throwable {
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName);
// Create an ACID table.
String tblName = "alerts";
primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "partitioned by (continent string, country string) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
// Static partition values
ArrayList<String> partitionVals = new ArrayList<String>(2);
partitionVals.add("Asia");
partitionVals.add("India");
// Create delimited record writer whose schema exactly matches table schema
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
// Create and open streaming connection (default.src table has to exist already)
// By default, txn batch size is 1.
StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withStaticPartitionValues(partitionVals).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
// Begin a transaction, write records and commit 1st transaction
connection.beginTransaction();
connection.write("1,val1".getBytes());
connection.write("2,val2".getBytes());
connection.commitTransaction();
// Replicate the committed data which should be visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2" }));
// Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
connection.beginTransaction();
connection.write("3,val3".getBytes());
connection.write("4,val4".getBytes());
// Replicate events before committing txn. The uncommitted data shouldn't be seen.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2" }));
connection.commitTransaction();
// After commit, the data should be replicated and visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
// Begin another transaction, write more records and abort 3rd transaction
connection.beginTransaction();
connection.write("5,val5".getBytes());
connection.write("6,val6".getBytes());
connection.abortTransaction();
// Aborted data shouldn't be visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
// Close the streaming connection
connection.close();
}
use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestCompactor method testHeartbeatShutdownOnFailedCompaction.
@Test
public void testHeartbeatShutdownOnFailedCompaction() throws Exception {
String dbName = "default";
String tblName = "compaction_test";
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
" CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
connection.beginTransaction();
connection.write("55, 'London'".getBytes());
connection.commitTransaction();
connection.beginTransaction();
connection.write("56, 'Paris'".getBytes());
connection.commitTransaction();
connection.close();
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
// Commit will throw an exception
IMetaStoreClient mockedClient = Mockito.spy(new HiveMetaStoreClient(conf));
doThrow(new RuntimeException("Simulating RuntimeException from CompactionTxn.commit")).when(mockedClient).commitTxn(Mockito.anyLong());
doAnswer(invocation -> {
Object o = invocation.callRealMethod();
// Check if the heartbeating is running
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().anyMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
return o;
}).when(mockedClient).openTxn(any(), any());
// Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
rqst.setPartitionname("bkt=0");
txnHandler.compact(rqst);
Worker worker = Mockito.spy(new Worker());
worker.setThreadId((int) worker.getId());
worker.setConf(conf);
worker.init(new AtomicBoolean(true));
FieldSetter.setField(worker, RemoteCompactorThread.class.getDeclaredField("msc"), mockedClient);
worker.run();
// Check if the transaction was opened
verify(mockedClient, times(1)).openTxn(any(), any());
// Check if the heartbeating is properly terminated
Assert.assertTrue(Thread.getAllStackTraces().keySet().stream().noneMatch(k -> k.getName().contains("CompactionTxn Heartbeater")));
}
use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestCompactor method autoCompactOnStreamingIngestWithDynamicPartition.
@Test
public void autoCompactOnStreamingIngestWithDynamicPartition() throws Exception {
String dbName = "default";
String tblName = "cws";
String columnNamesProperty = "a,b";
String columnTypesProperty = "string:int";
String agentInfo = "UT_" + Thread.currentThread().getName();
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a STRING) " + // currently ACID requires table to be bucketed
" PARTITIONED BY (b INT)" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer1 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StreamingConnection connection1 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer1).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
StreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer2).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
try {
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.beginTransaction();
connection1.write("1,1".getBytes());
connection1.commitTransaction();
connection1.close();
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
runInitiator(conf);
TxnStore txnHandler = TxnUtils.getTxnStore(conf);
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts1 = rsp.getCompacts();
Assert.assertEquals(1, compacts1.size());
SortedSet<String> partNames1 = new TreeSet<String>();
verifyCompactions(compacts1, partNames1, tblName);
List<String> names1 = new ArrayList<String>(partNames1);
Assert.assertEquals("b=1", names1.get(0));
runWorker(conf);
runCleaner(conf);
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.beginTransaction();
connection2.write("1,1".getBytes());
connection2.commitTransaction();
connection2.close();
runInitiator(conf);
List<ShowCompactResponseElement> compacts2 = rsp.getCompacts();
Assert.assertEquals(1, compacts2.size());
SortedSet<String> partNames2 = new TreeSet<String>();
verifyCompactions(compacts2, partNames2, tblName);
List<String> names2 = new ArrayList<String>(partNames2);
Assert.assertEquals("b=1", names2.get(0));
runWorker(conf);
runCleaner(conf);
// Find the location of the table
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
Table table = msClient.getTable(dbName, tblName);
String tablePath = table.getSd().getLocation();
String partName = "b=1";
Path partPath = new Path(tablePath, partName);
FileSystem fs = FileSystem.get(conf);
FileStatus[] stat = fs.listStatus(partPath, AcidUtils.baseFileFilter);
if (1 != stat.length) {
Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
}
String name = stat[0].getPath().getName();
Assert.assertEquals("base_0000005_v0000009", name);
CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
} finally {
if (connection1 != null) {
connection1.close();
}
if (connection2 != null) {
connection2.close();
}
}
}
use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestCompactor method prepareTableTwoPartitionsAndConnection.
private HiveStreamingConnection prepareTableTwoPartitionsAndConnection(String dbName, String tblName, int batchSize) throws Exception {
String agentInfo = "UT_" + Thread.currentThread().getName();
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(c STRING) " + // currently ACID requires table to be bucketed
" PARTITIONED BY (a INT, b INT)" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
// Create three folders with two different transactions
return HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).withStreamingOptimizations(true).withTransactionBatchSize(batchSize).connect();
}
Aggregations