Search in sources :

Example 26 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnCommands2 method testCompactWithDelete.

@Test
public void testCompactWithDelete() throws Exception {
    int[][] tableData = { { 1, 2 }, { 3, 4 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    runStatementOnDriver("delete from " + Table.ACIDTBL + " where b = 4");
    runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2");
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MINOR'");
    t.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 27 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnCommands2 method testEmptyInTblproperties.

/**
 * https://issues.apache.org/jira/browse/HIVE-17391
 */
@Test
public void testEmptyInTblproperties() throws Exception {
    runStatementOnDriver("create table t1 " + "(a int, b int) stored as orc TBLPROPERTIES ('serialization.null.format'='', 'transactional'='true')");
    runStatementOnDriver("insert into t1 " + "(a,b) values(1,7),(3,7)");
    runStatementOnDriver("update t1" + " set b = -2 where b = 2");
    runStatementOnDriver("alter table t1 " + " compact 'MAJOR'");
    runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 28 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnCommands2 method writeBetweenWorkerAndCleanerForVariousTblProperties.

protected void writeBetweenWorkerAndCleanerForVariousTblProperties(String tblProperties) throws Exception {
    String tblName = "hive12352";
    runStatementOnDriver("drop table if exists " + tblName);
    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ( " + tblProperties + " )");
    // create some data
    runStatementOnDriver("insert into " + tblName + " values(1, 'foo'),(2, 'bar'),(3, 'baz')");
    runStatementOnDriver("update " + tblName + " set b = 'blah' where a = 3");
    // run Worker to execute compaction
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean(true);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    // delete something, but make sure txn is rolled back
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true);
    runStatementOnDriver("delete from " + tblName + " where a = 1");
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false);
    List<String> expected = new ArrayList<>();
    expected.add("1\tfoo");
    expected.add("2\tbar");
    expected.add("3\tblah");
    Assert.assertEquals("", expected, runStatementOnDriver("select a,b from " + tblName + " order by a"));
    // run Cleaner
    Cleaner c = new Cleaner();
    c.setThreadId((int) c.getId());
    c.setConf(hiveConf);
    c.init(stop, new AtomicBoolean());
    c.run();
    // this seems odd, but we wan to make sure that to run CompactionTxnHandler.cleanEmptyAbortedTxns()
    Initiator i = new Initiator();
    i.setThreadId((int) i.getId());
    i.setConf(hiveConf);
    i.init(stop, new AtomicBoolean());
    i.run();
    // check that aborted operation didn't become committed
    Assert.assertEquals("", expected, runStatementOnDriver("select a,b from " + tblName + " order by a"));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Initiator(org.apache.hadoop.hive.ql.txn.compactor.Initiator) ArrayList(java.util.ArrayList) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Cleaner(org.apache.hadoop.hive.ql.txn.compactor.Cleaner)

Example 29 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnNoBuckets method testEmptyCompactionResult.

/**
 * see HIVE-18429
 */
@Test
public void testEmptyCompactionResult() throws Exception {
    hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true");
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T (a int, b int) stored as orc");
    int[][] data = { { 1, 2 }, { 3, 4 } };
    runStatementOnDriver("insert into T" + makeValuesClause(data));
    runStatementOnDriver("insert into T" + makeValuesClause(data));
    // delete the bucket files so now we have empty delta dirs
    List<String> rs = runStatementOnDriver("select distinct INPUT__FILE__NAME from T");
    FileSystem fs = FileSystem.get(hiveConf);
    for (String path : rs) {
        fs.delete(new Path(path), true);
    }
    runStatementOnDriver("alter table T compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    // check status of compaction job
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // now run another compaction make sure empty dirs don't cause issues
    runStatementOnDriver("insert into T" + makeValuesClause(data));
    runStatementOnDriver("alter table T compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    // check status of compaction job
    resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
    for (int i = 0; i < 2; i++) {
        Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(i).getState());
        Assert.assertTrue(resp.getCompacts().get(i).getHadoopJobId().startsWith("job_local"));
    }
    rs = runStatementOnDriver("select a, b from T order by a, b");
    Assert.assertEquals(stringifyValues(data), rs);
}
Also used : Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileSystem(org.apache.hadoop.fs.FileSystem) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 30 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnNoBuckets method testNonAcidToAcidVectorzied.

/**
 * Tests to check that we are able to use vectorized acid reader,
 * VectorizedOrcAcidRowBatchReader, when reading "original" files,
 * i.e. those that were written before the table was converted to acid.
 * See also acid_vectorization_original*.q
 */
@Test
public void testNonAcidToAcidVectorzied() throws Exception {
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
    // this enables vectorization of ROW__ID
    // HIVE-12631
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')");
    int[][] values = { { 1, 2 }, { 2, 4 }, { 5, 6 }, { 6, 8 }, { 9, 10 } };
    runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values));
    // , 'transactional_properties'='default'
    runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')");
    // Execution mode: vectorized
    // this uses VectorizedOrcAcidRowBatchReader
    String query = "select a from T where b > 6 order by a";
    List<String> rs = runStatementOnDriver(query);
    String[][] expected = { { "6", "" }, { "9", "" } };
    checkExpected(rs, expected, "After conversion");
    Assert.assertEquals(Integer.toString(6), rs.get(0));
    Assert.assertEquals(Integer.toString(9), rs.get(1));
    assertVectorized(shouldVectorize(), query);
    // why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected1 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected1, "After conversion with VC1");
    assertVectorized(shouldVectorize(), query);
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected2 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected2, "After conversion with VC2");
    assertVectorized(shouldVectorize(), query);
    // doesn't vectorize (uses neither of the Vectorzied Acid readers)
    query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    Assert.assertEquals("", 2, rs.size());
    String[][] expected3 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0" } };
    checkExpected(rs, expected3, "After non-vectorized read");
    Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
    runStatementOnDriver("update T set b = 17 where a = 1");
    // this should use VectorizedOrcAcidRowReader
    query = "select ROW__ID, b from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected4 = { { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}", "17" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "4" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "8" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "10" } };
    checkExpected(rs, expected4, "After conversion with VC4");
    assertVectorized(shouldVectorize(), query);
    runStatementOnDriver("alter table T compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // this should not vectorize at all
    query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
    rs = runStatementOnDriver(query);
    String[][] expected5 = { // the row__ids are the same after compaction
    { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000" } };
    checkExpected(rs, expected5, "After major compaction");
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)30 Test (org.junit.Test)27 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)18 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)15 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)15 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)14 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)12 Path (org.apache.hadoop.fs.Path)11 FileSystem (org.apache.hadoop.fs.FileSystem)10 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)9 ArrayList (java.util.ArrayList)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)7 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)7 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)6 TreeSet (java.util.TreeSet)5 TransactionBatch (org.apache.hive.hcatalog.streaming.TransactionBatch)3 List (java.util.List)2