Examples with TxnStore - org.apache.hadoop.hive.metastore.txn.TxnStore

Example 11 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method schemaEvolutionAddColDynamicPartitioningUpdate.

@Test
public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception {
    String tblName = "udpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
    // Validate the update.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1));
    // ALTER TABLE ... ADD COLUMNS
    executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
    // Validate there is an added NULL for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    // Second INSERT round with new inserts into previously existing partition 'yesterday'.
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
    // Validate there the new insertions for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
    executeStatementOnDriver("update " + tblName + " set c = 2000", driver);
    // Validate the update of new column c, even in old rows.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    // Set to 1 so insert doesn't set it off but update does
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
    initiator.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(4, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=last_century", names.get(0));
    Assert.assertEquals("ds=soon", names.get(1));
    Assert.assertEquals("ds=today", names.get(2));
    Assert.assertEquals("ds=yesterday", names.get(3));
    // Validate after compaction.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
}

Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 12 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method minorCompactWhileStreamingWithSplitUpdate.

@Test
public void minorCompactWhileStreamingWithSplitUpdate() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
        // Verify that we have got correct set of delete_deltas also
        FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
        String[] deleteDeltas = new String[deleteDeltaStat.length];
        Path minorCompactedDeleteDelta = null;
        for (int i = 0; i < deleteDeltas.length; i++) {
            deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
            if (deleteDeltas[i].equals("delete_delta_0000001_0000004")) {
                minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
            }
        }
        Arrays.sort(deleteDeltas);
        String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000004" };
        if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
            Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
        }
        // There should be no rows in the delete_delta because there have been no delete events.
        checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L);
    } finally {
        connection.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 13 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method majorCompactWhileStreamingForSplitUpdate.

@Test
public void majorCompactWhileStreamingForSplitUpdate() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true', " + "'transactional_properties'='default') ", // this turns on split-update U=D+I
    driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals(name, "base_0000004");
        checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}

Example 14 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCompactor method testMinorCompactionForSplitUpdateWithOnlyInserts.

@Test
public void testMinorCompactionForSplitUpdateWithOnlyInserts() throws Exception {
    String agentInfo = "UT_" + Thread.currentThread().getName();
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    // Insert some data -> this will generate only insert deltas and no delete deltas: delta_1_1
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
    // Insert some data -> this will again generate only insert deltas and no delete deltas: delta_2_2
    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 'bar')", driver);
    // Now, compact
    // One important thing to note in this test is that minor compaction always produces
    // delta_x_y and a counterpart delete_delta_x_y, even when there are no delete_delta events.
    // Such a choice has been made to simplify processing of AcidUtils.getAcidState().
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean(true);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    // Find the location of the table
    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
    Table table = msClient.getTable(dbName, tblName);
    FileSystem fs = FileSystem.get(conf);
    // Verify that we have got correct set of deltas.
    FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
    String[] deltas = new String[stat.length];
    Path minorCompactedDelta = null;
    for (int i = 0; i < deltas.length; i++) {
        deltas[i] = stat[i].getPath().getName();
        if (deltas[i].equals("delta_0000001_0000002")) {
            minorCompactedDelta = stat[i].getPath();
        }
    }
    Arrays.sort(deltas);
    String[] expectedDeltas = new String[] { "delta_0000001_0000001_0000", "delta_0000001_0000002", "delta_0000002_0000002_0000" };
    if (!Arrays.deepEquals(expectedDeltas, deltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeltas) + ", found: " + Arrays.toString(deltas));
    }
    checkExpectedTxnsPresent(null, new Path[] { minorCompactedDelta }, columnNamesProperty, columnTypesProperty, 0, 1L, 2L);
    // Verify that we have got correct set of delete_deltas.
    FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
    String[] deleteDeltas = new String[deleteDeltaStat.length];
    Path minorCompactedDeleteDelta = null;
    for (int i = 0; i < deleteDeltas.length; i++) {
        deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
        if (deleteDeltas[i].equals("delete_delta_0000001_0000002")) {
            minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
        }
    }
    Arrays.sort(deleteDeltas);
    String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000002" };
    if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
        Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
    }
    // There should be no rows in the delete_delta because there have been no delete events.
    checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L);
}

Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 15 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestAcidOnTez method testJoin.

// Ideally test like this should be a qfile test. However, the explain output from qfile is always
// slightly different depending on where the test is run, specifically due to file size estimation
private void testJoin(String engine, String joinType) throws Exception {
    // make a clone of existing hive conf
    HiveConf confForTez = new HiveConf(hiveConf);
    // make a clone of existing hive conf
    HiveConf confForMR = new HiveConf(hiveConf);
    if (engine.equals("tez")) {
        // one-time setup to make query able to run with Tez
        setupTez(confForTez);
    }
    if (joinType.equals("MapJoin")) {
        setupMapJoin(confForTez);
        setupMapJoin(confForMR);
    }
    runQueries(engine, joinType, confForTez, confForMR);
    // Perform compaction. Join result after compaction should still be the same
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    TestTxnCommands2.runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    TestTxnCommands2.runCleaner(hiveConf);
    runQueries(engine, joinType, confForTez, confForMR);
}

Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)24 Test (org.junit.Test)21 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)18 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)14 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)12 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)10 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)10 FileStatus (org.apache.hadoop.fs.FileStatus)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 ArrayList (java.util.ArrayList)7 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)7 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)7 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)6 TreeSet (java.util.TreeSet)5 TransactionBatch (org.apache.hive.hcatalog.streaming.TransactionBatch)3 GetOpenTxnsInfoResponse (org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse)2