Search in sources :

Example 16 with CompactionRequest

use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.

the class TestTxnCommands2 method testInitiatorWithMultipleFailedCompactionsForVariousTblProperties.

void testInitiatorWithMultipleFailedCompactionsForVariousTblProperties(String tblProperties) throws Exception {
    String tblName = "hive12353";
    runStatementOnDriver("drop table if exists " + tblName);
    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ( " + tblProperties + " )");
    hiveConf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 4);
    for (int i = 0; i < 5; i++) {
        //generate enough delta files so that Initiator can trigger auto compaction
        runStatementOnDriver("insert into " + tblName + " values(" + (i + 1) + ", 'foo'),(" + (i + 2) + ", 'bar'),(" + (i + 3) + ", 'baz')");
    }
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION, true);
    int numFailedCompactions = hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    AtomicBoolean stop = new AtomicBoolean(true);
    //create failed compactions
    for (int i = 0; i < numFailedCompactions; i++) {
        //each of these should fail
        txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
        runWorker(hiveConf);
    }
    //this should not schedule a new compaction due to prior failures, but will create Attempted entry
    Initiator init = new Initiator();
    init.setThreadId((int) init.getId());
    init.setHiveConf(hiveConf);
    init.init(stop, new AtomicBoolean());
    init.run();
    int numAttemptedCompactions = 1;
    checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions, 0, 0, 0, 0, numFailedCompactions + numAttemptedCompactions), countCompacts(txnHandler));
    hiveConf.setTimeVar(HiveConf.ConfVars.COMPACTOR_HISTORY_REAPER_INTERVAL, 10, TimeUnit.MILLISECONDS);
    AcidCompactionHistoryService compactionHistoryService = new AcidCompactionHistoryService();
    //should not remove anything from history
    runHouseKeeperService(compactionHistoryService, hiveConf);
    checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions, 0, 0, 0, 0, numFailedCompactions + numAttemptedCompactions), countCompacts(txnHandler));
    txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MAJOR));
    //will fail
    runWorker(hiveConf);
    txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
    //will fail
    runWorker(hiveConf);
    init.run();
    numAttemptedCompactions++;
    init.run();
    numAttemptedCompactions++;
    checkCompactionState(new CompactionsByState(numAttemptedCompactions, numFailedCompactions + 2, 0, 0, 0, 0, numFailedCompactions + 2 + numAttemptedCompactions), countCompacts(txnHandler));
    //should remove history so that we have
    runHouseKeeperService(compactionHistoryService, hiveConf);
    //COMPACTOR_HISTORY_RETENTION_FAILED failed compacts left (and no other since we only have failed ones here)
    checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 0, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED)), countCompacts(txnHandler));
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION, false);
    txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR));
    //at this point "show compactions" should have (COMPACTOR_HISTORY_RETENTION_FAILED) failed + 1 initiated (explicitly by user)
    checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 1, 0, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
    //will succeed and transition to Initiated->Working->Ready for Cleaning
    runWorker(hiveConf);
    checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 1, 0, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
    // transition to Success state
    runCleaner(hiveConf);
    //should not purge anything as all items within retention sizes
    runHouseKeeperService(compactionHistoryService, hiveConf);
    checkCompactionState(new CompactionsByState(hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED), hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED), 0, 0, 1, 0, hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_FAILED) + hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_HISTORY_RETENTION_ATTEMPTED) + 1), countCompacts(txnHandler));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Initiator(org.apache.hadoop.hive.ql.txn.compactor.Initiator) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) AcidCompactionHistoryService(org.apache.hadoop.hive.ql.txn.AcidCompactionHistoryService)

Example 17 with CompactionRequest

use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.

the class Initiator method requestCompaction.

private void requestCompaction(CompactionInfo ci, String runAs, CompactionType type) throws MetaException {
    CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, type);
    if (ci.partName != null)
        rqst.setPartitionname(ci.partName);
    rqst.setRunas(runAs);
    LOG.info("Requesting compaction: " + rqst);
    CompactionResponse resp = txnHandler.compact(rqst);
    if (resp.isAccepted()) {
        ci.id = resp.getId();
    }
}
Also used : CompactionResponse(org.apache.hadoop.hive.metastore.api.CompactionResponse) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest)

Example 18 with CompactionRequest

use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.

the class TestCompactor method minorCompactWhileStreaming.

@Test
public void minorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 19 with CompactionRequest

use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.

the class TestCompactor method majorCompactWhileStreaming.

@Test
public void majorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true') ", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals(name, "base_0000004");
        checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 20 with CompactionRequest

use of org.apache.hadoop.hive.metastore.api.CompactionRequest in project hive by apache.

the class TestCompactor method minorCompactWhileStreamingWithSplitUpdate.

@Test
public void minorCompactWhileStreamingWithSplitUpdate() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true'," + "'transactional_properties'='default')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
        // Verify that we have got correct set of delete_deltas also
        FileStatus[] deleteDeltaStat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deleteEventDeltaDirFilter);
        String[] deleteDeltas = new String[deleteDeltaStat.length];
        Path minorCompactedDeleteDelta = null;
        for (int i = 0; i < deleteDeltas.length; i++) {
            deleteDeltas[i] = deleteDeltaStat[i].getPath().getName();
            if (deleteDeltas[i].equals("delete_delta_0000001_0000004")) {
                minorCompactedDeleteDelta = deleteDeltaStat[i].getPath();
            }
        }
        Arrays.sort(deleteDeltas);
        String[] expectedDeleteDeltas = new String[] { "delete_delta_0000001_0000004" };
        if (!Arrays.deepEquals(expectedDeleteDeltas, deleteDeltas)) {
            Assert.fail("Expected: " + Arrays.toString(expectedDeleteDeltas) + ", found: " + Arrays.toString(deleteDeltas));
        }
        // There should be no rows in the delete_delta because there have been no delete events.
        checkExpectedTxnsPresent(null, new Path[] { minorCompactedDeleteDelta }, columnNamesProperty, columnTypesProperty, 0, 0L, 0L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Aggregations

CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)56 Test (org.junit.Test)52 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)41 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)41 Table (org.apache.hadoop.hive.metastore.api.Table)40 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)33 Path (org.apache.hadoop.fs.Path)29 FileStatus (org.apache.hadoop.fs.FileStatus)24 FileSystem (org.apache.hadoop.fs.FileSystem)24 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)13 Partition (org.apache.hadoop.hive.metastore.api.Partition)12 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)12 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)12 ArrayList (java.util.ArrayList)10 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)9 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)8 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)8 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)7 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)7 LockComponent (org.apache.hadoop.hive.metastore.api.LockComponent)6