Examples with DelimitedInputWriter - org.apache.hive.hcatalog.streaming.DelimitedInputWriter

Example 6 with DelimitedInputWriter

use of org.apache.hive.hcatalog.streaming.DelimitedInputWriter in project hive by apache.

the class TestCompactor method minorCompactAfterAbort.

@Test
public void minorCompactAfterAbort() throws Exception {
    String agentInfo = "UT_" + Thread.currentThread().getName();
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, abort everything, don't properly close it
        TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
        txnBatch.beginNextTransaction();
        txnBatch.abort();
        txnBatch.beginNextTransaction();
        txnBatch.abort();
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultDelta = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultDelta = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, 1);
    } finally {
        connection.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) TransactionBatch(org.apache.hive.hcatalog.streaming.TransactionBatch) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 7 with DelimitedInputWriter

use of org.apache.hive.hcatalog.streaming.DelimitedInputWriter in project hive by apache.

the class TestCompactor method majorCompactAfterAbort.

@Test
public void majorCompactAfterAbort() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
        txnBatch.beginNextTransaction();
        txnBatch.abort();
        txnBatch.beginNextTransaction();
        txnBatch.abort();
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("majorCompactAfterAbort FileStatus[] stat " + Arrays.toString(stat));
        }
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        if (!name.equals("base_0000004")) {
            Assert.fail("majorCompactAfterAbort name " + name + " not equals to base_0000004");
        }
        checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, 1);
    } finally {
        connection.close();
    }
}

Aggregations

AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)7 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)7 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)7 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)7 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)7 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)7 Test (org.junit.Test)7 FileStatus (org.apache.hadoop.fs.FileStatus)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)6 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)6 Table (org.apache.hadoop.hive.metastore.api.Table)6 TransactionBatch (org.apache.hive.hcatalog.streaming.TransactionBatch)3 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)1 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)1 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1