Search in sources :

Example 6 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCompactor method autoCompactOnStreamingIngestWithDynamicPartition.

@Test
public void autoCompactOnStreamingIngestWithDynamicPartition() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "string:int";
    String agentInfo = "UT_" + Thread.currentThread().getName();
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a STRING) " + // currently ACID requires table to be bucketed
    " PARTITIONED BY (b INT)" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer1 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StreamingConnection connection1 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer1).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
    StreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer2).withStreamingOptimizations(true).withTransactionBatchSize(1).connect();
    try {
        connection1.beginTransaction();
        connection1.write("1,1".getBytes());
        connection1.commitTransaction();
        connection1.beginTransaction();
        connection1.write("1,1".getBytes());
        connection1.commitTransaction();
        connection1.close();
        conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
        runInitiator(conf);
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
        List<ShowCompactResponseElement> compacts1 = rsp.getCompacts();
        Assert.assertEquals(1, compacts1.size());
        SortedSet<String> partNames1 = new TreeSet<String>();
        verifyCompactions(compacts1, partNames1, tblName);
        List<String> names1 = new ArrayList<String>(partNames1);
        Assert.assertEquals("b=1", names1.get(0));
        runWorker(conf);
        runCleaner(conf);
        connection2.beginTransaction();
        connection2.write("1,1".getBytes());
        connection2.commitTransaction();
        connection2.beginTransaction();
        connection2.write("1,1".getBytes());
        connection2.commitTransaction();
        connection2.close();
        runInitiator(conf);
        List<ShowCompactResponseElement> compacts2 = rsp.getCompacts();
        Assert.assertEquals(1, compacts2.size());
        SortedSet<String> partNames2 = new TreeSet<String>();
        verifyCompactions(compacts2, partNames2, tblName);
        List<String> names2 = new ArrayList<String>(partNames2);
        Assert.assertEquals("b=1", names2.get(0));
        runWorker(conf);
        runCleaner(conf);
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        String tablePath = table.getSd().getLocation();
        String partName = "b=1";
        Path partPath = new Path(tablePath, partName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(partPath, AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals("base_0000005_v0000009", name);
        CompactorTestUtil.checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, null, 1);
    } finally {
        if (connection1 != null) {
            connection1.close();
        }
        if (connection2 != null) {
            connection2.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ArrayList(java.util.ArrayList) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 7 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionWhileStreaming.

@Test
public void testMinorCompactionWhileStreaming() throws Exception {
    String dbName = "default";
    String tableName = "testMinorCompaction";
    executeStatementOnDriver("drop table if exists " + tableName, driver);
    executeStatementOnDriver("CREATE TABLE " + tableName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StreamingConnection connection = null;
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            CompactorTestUtil.writeBatch(conf, dbName, tableName, false, false);
        }
        // Start a third batch, but don't close it.
        connection = CompactorTestUtil.writeBatch(conf, dbName, tableName, false, true);
        // Now, compact
        CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
        // Find the location of the table
        IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
        Table table = metaStoreClient.getTable(dbName, tableName);
        FileSystem fs = FileSystem.get(conf);
        Assert.assertEquals("Delta names does not match", Arrays.asList("delta_0000001_0000002", "delta_0000001_0000005_v0000009", "delta_0000003_0000004", "delta_0000005_0000006"), CompactorTestUtil.getBaseOrDeltaNames(fs, null, table, null));
        CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { new Path(table.getSd().getLocation(), "delta_0000001_0000005_v0000009") }, "a,b", "int:string", 0, 1L, 4L, null, 1);
    } finally {
        if (connection != null) {
            connection.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 8 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionWhileStreamingWithAbortInMiddle.

@Test
public void testMinorCompactionWhileStreamingWithAbortInMiddle() throws Exception {
    String dbName = "default";
    String tableName = "testMinorCompaction";
    executeStatementOnDriver("drop table if exists " + tableName, driver);
    executeStatementOnDriver("CREATE TABLE " + tableName + "(a INT, b STRING) " + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tableName).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
    connection.beginTransaction();
    connection.write("50,Kiev".getBytes());
    connection.write("51,St. Petersburg".getBytes());
    connection.write("52,Boston".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("60,Budapest".getBytes());
    connection.abortTransaction();
    connection.beginTransaction();
    connection.write("71,Szeged".getBytes());
    connection.write("72,Debrecen".getBytes());
    connection.commitTransaction();
    connection.close();
    CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
    CompactorTestUtil.runCleaner(conf);
    // Find the location of the table
    IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
    Table table = metaStoreClient.getTable(dbName, tableName);
    FileSystem fs = FileSystem.get(conf);
    Assert.assertEquals("Delta names does not match", Collections.singletonList("delta_0000001_0000003_v0000006"), CompactorTestUtil.getBaseOrDeltaNames(fs, null, table, null));
    CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { new Path(table.getSd().getLocation(), "delta_0000001_0000003_v0000006") }, "a,b", "int:string", 0, 1L, 3L, Lists.newArrayList(2), 1);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 9 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionWhileStreamingWithSplitUpdate.

@Test
public void testMinorCompactionWhileStreamingWithSplitUpdate() throws Exception {
    String dbName = "default";
    String tableName = "testMinorCompaction";
    executeStatementOnDriver("drop table if exists " + tableName, driver);
    executeStatementOnDriver("CREATE TABLE " + tableName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    StreamingConnection connection = null;
    // Write a couple of batches
    try {
        for (int i = 0; i < 2; i++) {
            CompactorTestUtil.writeBatch(conf, dbName, tableName, false, false);
        }
        // Start a third batch, but don't close it.
        connection = CompactorTestUtil.writeBatch(conf, dbName, tableName, false, true);
        // Now, compact
        CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
        // Find the location of the table
        IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
        Table table = metaStoreClient.getTable(dbName, tableName);
        FileSystem fs = FileSystem.get(conf);
        Assert.assertEquals("Delta names does not match", Arrays.asList("delta_0000001_0000002", "delta_0000001_0000005_v0000009", "delta_0000003_0000004", "delta_0000005_0000006"), CompactorTestUtil.getBaseOrDeltaNames(fs, null, table, null));
        CompactorTestUtil.checkExpectedTxnsPresent(null, new Path[] { new Path(table.getSd().getLocation(), "delta_0000001_0000005_v0000009") }, "a,b", "int:string", 0, 1L, 4L, null, 1);
        // Assert that we have no delete deltas if there are no input delete events.
        Assert.assertEquals(0, CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deleteEventDeltaDirFilter, table, null).size());
    } finally {
        if (connection != null) {
            connection.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Test(org.junit.Test)

Example 10 with StreamingConnection

use of org.apache.hive.streaming.StreamingConnection in project hive by apache.

the class CompactorTestUtil method writeBatch.

/**
 * Open a hive streaming connection, write some content in two transactions.
 * @param conf hive configuration
 * @param dbName name of the database
 * @param tblName name of the table
 * @param abort abort all transactions in connection
 * @param keepOpen keep the streaming connection open after the transaction has been committed
 * @return open streaming connection, can be null
 * @throws StreamingException streaming connection cannot be established
 */
static StreamingConnection writeBatch(HiveConf conf, String dbName, String tblName, boolean abort, boolean keepOpen) throws StreamingException {
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).withTransactionBatchSize(2).connect();
    connection.beginTransaction();
    if (abort) {
        connection.abortTransaction();
    } else {
        connection.write("50,Kiev".getBytes());
        connection.write("51,St. Petersburg".getBytes());
        connection.write("44,Boston".getBytes());
        connection.commitTransaction();
    }
    if (!keepOpen) {
        connection.beginTransaction();
        if (abort) {
            connection.abortTransaction();
        } else {
            connection.write("52,Tel Aviv".getBytes());
            connection.write("53,Atlantis".getBytes());
            connection.write("53,Boston".getBytes());
            connection.commitTransaction();
        }
        connection.close();
        return null;
    }
    return connection;
}
Also used : StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) StreamingConnection(org.apache.hive.streaming.StreamingConnection) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection)

Aggregations

HiveStreamingConnection (org.apache.hive.streaming.HiveStreamingConnection)12 StreamingConnection (org.apache.hive.streaming.StreamingConnection)12 Test (org.junit.Test)11 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 StrictDelimitedInputWriter (org.apache.hive.streaming.StrictDelimitedInputWriter)6 FileStatus (org.apache.hadoop.fs.FileStatus)5 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)3 ArrayList (java.util.ArrayList)2 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)2 TreeSet (java.util.TreeSet)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)1 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)1