Search in sources :

Example 1 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCompactor method minorCompactWhileStreaming.

@Test
public void minorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StreamingConnection connection = null;
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            CompactorTestUtil.writeBatch(conf, dbName, tblName, false, false);
        }
        // Start a third batch, but don't close it.
        connection = CompactorTestUtil.writeBatch(conf, dbName, tblName, false, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        runWorker(conf);
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004_v0000009")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004_v0000009", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names) + ",stat=" + CompactorTestUtil.printFileStatus(stat));
        }
        CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
    } finally {
        if (connection != null) {
            connection.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 2 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCompactor method majorCompactWhileStreaming.

@Test
public void majorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true') ", driver);
    StreamingConnection connection = null;
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            CompactorTestUtil.writeBatch(conf, dbName, tblName, false, false);
        }
        // Start a third batch, but don't close it.  this delta will be ignored by compaction since
        // it has an open txn in it
        connection = CompactorTestUtil.writeBatch(conf, dbName, tblName, false, true);
        runMajorCompaction(dbName, tblName);
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals("base_0000004_v0000009", name);
        CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
    } finally {
        if (connection != null) {
            connection.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 3 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCompactor method majorCompactWhileStreamingForSplitUpdate.

@Test
public void majorCompactWhileStreamingForSplitUpdate() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true', " + "'transactional_properties'='default') ", // this turns on split-update U=D+I
    driver);
    // Write a couple of batches
    for (int i = 0; i < 2; i++) {
        CompactorTestUtil.writeBatch(conf, dbName, tblName, false, false);
    }
    // Start a third batch, but don't close it.
    StreamingConnection connection1 = CompactorTestUtil.writeBatch(conf, dbName, tblName, false, true);
    runMajorCompaction(dbName, tblName);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
    if (1 != stat.length) {
        Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
    }
    String name = stat[0].getPath().getName();
    Assert.assertEquals("base_0000004_v0000009", name);
    CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 1, 1L, 4L, null, 2);
    if (connection1 != null) {
        connection1.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 4 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCompactor method minorCompactWhileStreamingWithSplitUpdate.

@Test
public void minorCompactWhileStreamingWithSplitUpdate() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    // Write a couple of batches
    for (int i = 0; i < 2; i++) {
        CompactorTestUtil.writeBatch(conf, dbName, tblName, false, false);
    }
    // Start a third batch, but don't close it.
    StreamingConnection connection1 = CompactorTestUtil.writeBatch(conf, dbName, tblName, false, true);
    // Now, compact
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
    runWorker(conf);
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
    String[] names = new String[stat.length];
    Path resultFile = null;
    for (int i = 0; i < names.length; i++) {
        names[i] = stat[i].getPath().getName();
        if (names[i].equals("delta_0000001_0000004_v0000009")) {
            resultFile = stat[i].getPath();
        }
    }
    Arrays.sort(names);
    String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004_v0000009", "delta_0000003_0000004", "delta_0000005_0000006" };
    if (!Arrays.deepEquals(expected, names)) {
        Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
    }
    CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
    // Assert that we have no delete deltas if there are no input delete events.
    FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
    assertEquals(0, deleteDeltaStat.length);
    if (connection1 != null) {
        connection1.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 5 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestReplicationOfHiveStreaming method testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne.

@Test
public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throwable {
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName);
    // Create an ACID table.
    String tblName = "alerts";
    primary.run("use " + primaryDbName).run("create table " + tblName + "( id int , msg string ) " + "partitioned by (continent string, country string) " + "clustered by (id) into 5 buckets " + "stored as orc tblproperties(\"transactional\"=\"true\")");
    // Static partition values
    ArrayList<String> partitionVals = new ArrayList<String>(2);
    partitionVals.add("Asia");
    partitionVals.add("India");
    // Create delimited record writer whose schema exactly matches table schema
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    // Create and open streaming connection (default.src table has to exist already)
    // By default, txn batch size is 1.
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(primaryDbName).withTable(tblName).withStaticPartitionValues(partitionVals).withAgentInfo("example-agent-1").withRecordWriter(writer).withHiveConf(primary.getConf()).connect();
    // Begin a transaction, write records and commit 1st transaction
    connection.beginTransaction();
    connection.write("1,val1".getBytes());
    connection.write("2,val2".getBytes());
    connection.commitTransaction();
    // Replicate the committed data which should be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2" }));
    // Begin another transaction, write more records and commit 2nd transaction after REPL LOAD.
    connection.beginTransaction();
    connection.write("3,val3".getBytes());
    connection.write("4,val4".getBytes());
    // Replicate events before committing txn. The uncommitted data shouldn't be seen.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2" }));
    connection.commitTransaction();
    // After commit, the data should be replicated and visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
    // Begin another transaction, write more records and abort 3rd transaction
    connection.beginTransaction();
    connection.write("5,val5".getBytes());
    connection.write("6,val6".getBytes());
    connection.abortTransaction();
    // Aborted data shouldn't be visible.
    primary.dump(primaryDbName);
    replica.loadWithoutExplain(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg").verifyResults((new String[] { "val1", "val2", "val3", "val4" }));
    // Close the streaming connection
    connection.close();
}
Also used : ArrayList(java.util.ArrayList) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) Test(org.junit.Test)

Aggregations

HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)12 StreamingConnection (org.apache.hive.streaming.StreamingConnection)12 Test (org.junit.Test)11 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)6 FileStatus (org.apache.hadoop.fs.FileStatus)5 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 ArrayList (java.util.ArrayList)2 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)2 TreeSet (java.util.TreeSet)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)1 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)1