Search in sources :

Example 11 with StrictDelimitedInputWriter

use of org.apache.hive.streaming.StrictDelimitedInputWriter in project hive by apache.

the class TestReplicationOfHiveStreaming method testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne.

@Test
public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Throwable {
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName);
    // Create an ACID table.
    String tblName = "alerts";
    primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "partitioned by (continent string, country string) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
    // Dynamic partitioning
    // Create delimited record writer whose schema exactly matches table schema
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    // Create and open streaming connection (default.src table has to exist already)
    // By default, txn batch size is 1.
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
    // Begin a transaction, write records and commit 1st transaction
    connection.beginTransaction();
    // Dynamic partition mode where last 2 columns are partition values
    connection.write("11,val11,Asia,China".getBytes());
    connection.write("12,val12,Asia,India".getBytes());
    connection.commitTransaction();
    // Replicate the committed data which should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" })).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
    // Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
    connection.beginTransaction();
    connection.write("13,val13,Europe,Germany".getBytes());
    connection.write("14,val14,Asia,India".getBytes());
    // Replicate events before committing txn. The uncommitted data shouldn't be seen.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
    connection.commitTransaction();
    // After committing the txn, the data should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Europe' and country='Germany' order by msg").verifyResults((new String[] { "val13" }));
    // Begin a transaction, write records and abort 3rd transaction
    connection.beginTransaction();
    connection.write("15,val15,Asia,China".getBytes());
    connection.write("16,val16,Asia,India".getBytes());
    connection.abortTransaction();
    // Aborted data should not be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" }));
    // Close the streaming connection
    connection.close();
}
Also used : StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Aggregations

StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)11 HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)9 StreamingConnection (org.apache.hive.streaming.StreamingConnection)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)4 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)4 Table (org.apache.hadoop.hive.metastore.api.Table)4 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)4 List (java.util.List)3 TreeSet (java.util.TreeSet)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)3 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)3 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)3 Lists (com.google.common.collect.Lists)2 File (java.io.File)2 FileWriter (java.io.FileWriter)2