Search in sources :

Example 11 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestReplicationOfHiveStreaming method testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne.

@Test
public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwable {
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName);
    // Create an ACID table.
    String tblName = "alerts";
    primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
    // Create delimited record writer whose schema exactly matches table schema
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    // Create and open streaming connection (default.src table has to exist already)
    // By default, txn batch size is 1.
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
    // Begin a transaction, write records and commit 1st transaction
    connection.beginTransaction();
    connection.write("1,val1".getBytes());
    connection.write("2,val2".getBytes());
    connection.commitTransaction();
    // Replicate the committed data which should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " order by msg").verifyResults((new String[] { "val1", "val2" }));
    // Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
    connection.beginTransaction();
    connection.write("3,val3".getBytes());
    connection.write("4,val4".getBytes());
    // Replicate events before committing txn. The uncommitted data shouldn't be seen.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " order by msg").verifyResults((new String[] { "val1", "val2" }));
    connection.commitTransaction();
    // After commit, the data should be replicated and visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
    // Begin another transaction, write more records and abort 3rd transaction
    connection.beginTransaction();
    connection.write("5,val5".getBytes());
    connection.write("6,val6".getBytes());
    connection.abortTransaction();
    // Aborted data shouldn't be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
    // Close the streaming connection
    connection.close();
}
Also used : StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Example 12 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestReplicationOfHiveStreaming method testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne.

@Test
public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Throwable {
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName);
    // Create an ACID table.
    String tblName = "alerts";
    primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "partitioned by (continent string, country string) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
    // Dynamic partitioning
    // Create delimited record writer whose schema exactly matches table schema
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    // Create and open streaming connection (default.src table has to exist already)
    // By default, txn batch size is 1.
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
    // Begin a transaction, write records and commit 1st transaction
    connection.beginTransaction();
    // Dynamic partition mode where last 2 columns are partition values
    connection.write("11,val11,Asia,China".getBytes());
    connection.write("12,val12,Asia,India".getBytes());
    connection.commitTransaction();
    // Replicate the committed data which should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" })).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
    // Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
    connection.beginTransaction();
    connection.write("13,val13,Europe,Germany".getBytes());
    connection.write("14,val14,Asia,India".getBytes());
    // Replicate events before committing txn. The uncommitted data shouldn't be seen.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12" }));
    connection.commitTransaction();
    // After committing the txn, the data should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Europe' and country='Germany' order by msg").verifyResults((new String[] { "val13" }));
    // Begin a transaction, write records and abort 3rd transaction
    connection.beginTransaction();
    connection.write("15,val15,Asia,China".getBytes());
    connection.write("16,val16,Asia,India".getBytes());
    connection.abortTransaction();
    // Aborted data should not be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val12", "val14" })).run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg").verifyResults((new String[] { "val11" }));
    // Close the streaming connection
    connection.close();
}
Also used : StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Aggregations

HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)12 StreamingConnection (org.apache.hive.streaming.StreamingConnection)12 Test (org.junit.Test)11 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)6 FileStatus (org.apache.hadoop.fs.FileStatus)5 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 ArrayList (java.util.ArrayList)2 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)2 TreeSet (java.util.TreeSet)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)1 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)1