use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.
the class TestReplicationOfHiveStreaming method testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne.
@Test
public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Throwable {
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName);
// Create an ACID table.
String tblName = "alerts";
primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "partitioned by (continent string, country string) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
// Dynamic partitioning
// Create delimited record writer whose schema exactly matches table schema
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
// Create and open streaming connection (default.src table has to exist already)
// By default, txn batch size is 1.
StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
// Begin a transaction, write records and commit 1st transaction
connection.beginTransaction();
// Dynamic partition mode where last 2 columns are partition values
connection.write("11,val11,Asia,China".getBytes());
connection.write("12,val12,Asia,India".getBytes());
connection.commitTransaction();
// Replicate the committed data which should be visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" })).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
// Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
connection.beginTransaction();
connection.write("13,val13,Europe,Germany".getBytes());
connection.write("14,val14,Asia,India".getBytes());
// Replicate events before committing txn. The uncommitted data shouldn't be seen.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
connection.commitTransaction();
// After committing the txn, the data should be visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Europe' and country='Germany' order by msg").verifyResults((new String[] { "val13" }));
// Begin a transaction, write records and abort 3rd transaction
connection.beginTransaction();
connection.write("15,val15,Asia,China".getBytes());
connection.write("16,val16,Asia,India".getBytes());
connection.abortTransaction();
// Aborted data should not be visible.
primary.dump(primaryDbName);
replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" }));
// Close the streaming connection
connection.close();
}
Aggregations