Search in sources :

Example 1 with Worker

use of org.apache.hadoop.hive.ql.txn.compactor.Worker in project hive by apache.

the class TestTxnCommands2 method testAlterTable.

@Test
public void testAlterTable() throws Exception {
    int[][] tableData = { { 1, 2 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    int[][] tableData2 = { { 5, 6 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData2));
    List<String> rs1 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " where b > 0 order by a,b");
    runStatementOnDriver("alter table " + Table.ACIDTBL + " add columns(c int)");
    int[][] moreTableData = { { 7, 8, 9 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b,c) " + makeValuesClause(moreTableData));
    List<String> rs0 = runStatementOnDriver("select a,b,c from " + Table.ACIDTBL + " where a > 0 order by a,b,c");
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) Test(org.junit.Test)

Example 2 with Worker

use of org.apache.hadoop.hive.ql.txn.compactor.Worker in project hive by apache.

the class TestTxnCommands2 method runWorker.

public static void runWorker(HiveConf hiveConf) throws MetaException {
    AtomicBoolean stop = new AtomicBoolean(true);
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker)

Example 3 with Worker

use of org.apache.hadoop.hive.ql.txn.compactor.Worker in project hive by apache.

the class TestTxnCommands2 method testOrcPPD.

/**
   * this is run 2 times: 1 with PPD on, 1 with off
   * Also, the queries are such that if we were to push predicate down to an update/delete delta,
   * the test would produce wrong results
   * @param enablePPD
   * @throws Exception
   */
private void testOrcPPD(boolean enablePPD) throws Exception {
    boolean originalPpd = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER);
    //enables ORC PPD
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, enablePPD);
    //create delta_0001_0001_0000 (should push predicate here)
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(new int[][] { { 1, 2 }, { 3, 4 } }));
    List<String> explain;
    String query = "update " + Table.ACIDTBL + " set b = 5 where a = 3";
    if (enablePPD) {
        explain = runStatementOnDriver("explain " + query);
        /*
      here is a portion of the above "explain".  The "filterExpr:" in the TableScan is the pushed predicate
      w/o PPD, the line is simply not there, otherwise the plan is the same
       Map Operator Tree:,
         TableScan,
          alias: acidtbl,
          filterExpr: (a = 3) (type: boolean),
            Filter Operator,
             predicate: (a = 3) (type: boolean),
             Select Operator,
             ...
       */
        assertExplainHasString("filterExpr: (a = 3)", explain, "PPD wasn't pushed");
    }
    //create delta_0002_0002_0000 (can't push predicate)
    runStatementOnDriver(query);
    query = "select a,b from " + Table.ACIDTBL + " where b = 4 order by a,b";
    if (enablePPD) {
        /*at this point we have 2 delta files, 1 for insert 1 for update
      * we should push predicate into 1st one but not 2nd.  If the following 'select' were to
      * push into the 'update' delta, we'd filter out {3,5} before doing merge and thus
     * produce {3,4} as the value for 2nd row.  The right result is 0-rows.*/
        explain = runStatementOnDriver("explain " + query);
        assertExplainHasString("filterExpr: (b = 4)", explain, "PPD wasn't pushed");
    }
    List<String> rs0 = runStatementOnDriver(query);
    Assert.assertEquals("Read failed", 0, rs0.size());
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    //now we have base_0001 file
    int[][] tableData2 = { { 1, 7 }, { 5, 6 }, { 7, 8 }, { 9, 10 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData2));
    //now we have delta_0003_0003_0000 with inserts only (ok to push predicate)
    if (enablePPD) {
        explain = runStatementOnDriver("explain delete from " + Table.ACIDTBL + " where a=7 and b=8");
        assertExplainHasString("filterExpr: ((a = 7) and (b = 8))", explain, "PPD wasn't pushed");
    }
    runStatementOnDriver("delete from " + Table.ACIDTBL + " where a=7 and b=8");
    //now we have delta_0004_0004_0000 with delete events
    /*(can't push predicate to 'delete' delta)
    * if we were to push to 'delete' delta, we'd filter out all rows since the 'row' is always NULL for
    * delete events and we'd produce data as if the delete never happened*/
    query = "select a,b from " + Table.ACIDTBL + " where a > 1 order by a,b";
    if (enablePPD) {
        explain = runStatementOnDriver("explain " + query);
        assertExplainHasString("filterExpr: (a > 1)", explain, "PPD wasn't pushed");
    }
    List<String> rs1 = runStatementOnDriver(query);
    int[][] resultData = new int[][] { { 3, 5 }, { 5, 6 }, { 9, 10 } };
    Assert.assertEquals("Update failed", stringifyValues(resultData), rs1);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, originalPpd);
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker)

Example 4 with Worker

use of org.apache.hadoop.hive.ql.txn.compactor.Worker in project hive by apache.

the class TestTxnCommands2 method writeBetweenWorkerAndCleanerForVariousTblProperties.

protected void writeBetweenWorkerAndCleanerForVariousTblProperties(String tblProperties) throws Exception {
    String tblName = "hive12352";
    runStatementOnDriver("drop table if exists " + tblName);
    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ( " + tblProperties + " )");
    //create some data
    runStatementOnDriver("insert into " + tblName + " values(1, 'foo'),(2, 'bar'),(3, 'baz')");
    runStatementOnDriver("update " + tblName + " set b = 'blah' where a = 3");
    //run Worker to execute compaction
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean(true);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
    //delete something, but make sure txn is rolled back
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true);
    runStatementOnDriver("delete from " + tblName + " where a = 1");
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false);
    List<String> expected = new ArrayList<>();
    expected.add("1\tfoo");
    expected.add("2\tbar");
    expected.add("3\tblah");
    Assert.assertEquals("", expected, runStatementOnDriver("select a,b from " + tblName + " order by a"));
    //run Cleaner
    Cleaner c = new Cleaner();
    c.setThreadId((int) c.getId());
    c.setHiveConf(hiveConf);
    c.init(stop, new AtomicBoolean());
    c.run();
    //this seems odd, but we wan to make sure that to run CompactionTxnHandler.cleanEmptyAbortedTxns()
    Initiator i = new Initiator();
    i.setThreadId((int) i.getId());
    i.setHiveConf(hiveConf);
    i.init(stop, new AtomicBoolean());
    i.run();
    //check that aborted operation didn't become committed
    Assert.assertEquals("", expected, runStatementOnDriver("select a,b from " + tblName + " order by a"));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Initiator(org.apache.hadoop.hive.ql.txn.compactor.Initiator) ArrayList(java.util.ArrayList) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Cleaner(org.apache.hadoop.hive.ql.txn.compactor.Cleaner)

Example 5 with Worker

use of org.apache.hadoop.hive.ql.txn.compactor.Worker in project hive by apache.

the class TestTxnCommands2 method testCompactWithDelete.

@Test
public void testCompactWithDelete() throws Exception {
    int[][] tableData = { { 1, 2 }, { 3, 4 } };
    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData));
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setHiveConf(hiveConf);
    AtomicBoolean stop = new AtomicBoolean();
    AtomicBoolean looped = new AtomicBoolean();
    stop.set(true);
    t.init(stop, looped);
    t.run();
    runStatementOnDriver("delete from " + Table.ACIDTBL + " where b = 4");
    runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2");
    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MINOR'");
    t.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) Worker(org.apache.hadoop.hive.ql.txn.compactor.Worker) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Aggregations

AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)5 Worker (org.apache.hadoop.hive.ql.txn.compactor.Worker)5 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)1 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)1 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)1 Cleaner (org.apache.hadoop.hive.ql.txn.compactor.Cleaner)1 Initiator (org.apache.hadoop.hive.ql.txn.compactor.Initiator)1