Search in sources :

Example 46 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestTxnNoBuckets method testNonAcidToAcidVectorzied.

/**
 * Tests to check that we are able to use vectorized acid reader,
 * VectorizedOrcAcidRowBatchReader, when reading "original" files,
 * i.e. those that were written before the table was converted to acid.
 * See also acid_vectorization_original*.q
 */
@Test
public void testNonAcidToAcidVectorzied() throws Exception {
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
    // this enables vectorization of ROW__ID
    // HIVE-12631
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')");
    int[][] values = { { 1, 2 }, { 2, 4 }, { 5, 6 }, { 6, 8 }, { 9, 10 } };
    runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values));
    // , 'transactional_properties'='default'
    runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')");
    // Execution mode: vectorized
    // this uses VectorizedOrcAcidRowBatchReader
    String query = "select a from T where b > 6 order by a";
    List<String> rs = runStatementOnDriver(query);
    String[][] expected = { { "6", "" }, { "9", "" } };
    checkExpected(rs, expected, "After conversion");
    Assert.assertEquals(Integer.toString(6), rs.get(0));
    Assert.assertEquals(Integer.toString(9), rs.get(1));
    assertVectorized(shouldVectorize(), query);
    // why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected1 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected1, "After conversion with VC1");
    assertVectorized(shouldVectorize(), query);
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected2 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected2, "After conversion with VC2");
    assertVectorized(shouldVectorize(), query);
    // doesn't vectorize (uses neither of the Vectorzied Acid readers)
    query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    Assert.assertEquals("", 2, rs.size());
    String[][] expected3 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0" } };
    checkExpected(rs, expected3, "After non-vectorized read");
    Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
    runStatementOnDriver("update T set b = 17 where a = 1");
    // this should use VectorizedOrcAcidRowReader
    query = "select ROW__ID, b from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected4 = { { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}", "17" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "4" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "8" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "10" } };
    checkExpected(rs, expected4, "After conversion with VC4");
    assertVectorized(shouldVectorize(), query);
    runStatementOnDriver("alter table T compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // this should not vectorize at all
    query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
    rs = runStatementOnDriver(query);
    String[][] expected5 = { // the row__ids are the same after compaction
    { "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000" } };
    checkExpected(rs, expected5, "After major compaction");
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 47 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class Initiator method run.

@Override
public void run() {
    // so wrap it in a big catch Throwable statement.
    try {
        recoverFailedCompactions(false);
        int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
        // HiveMetaStore.
        do {
            long startedAt = -1;
            TxnStore.MutexAPI.LockHandle handle = null;
            // don't doom the entire thread.
            try {
                handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
                startedAt = System.currentTimeMillis();
                // todo: add method to only get current i.e. skip history - more efficient
                ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
                for (CompactionInfo ci : potentials) {
                    LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                    try {
                        Table t = resolveTable(ci);
                        if (t == null) {
                            // Most likely this means it's a temp table
                            LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
                            continue;
                        }
                        // check if no compaction set for this table
                        if (noAutoCompactSet(t)) {
                            LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
                            continue;
                        }
                        // then it's a dynamic partitioning case and we shouldn't check the table itself.
                        if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
                            LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
                            continue;
                        }
                        // the time currentCompactions is generated and now
                        if (lookForCurrentCompactions(currentCompactions, ci)) {
                            LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
                            continue;
                        }
                        if (txnHandler.checkFailedCompactions(ci)) {
                            LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
                            txnHandler.markFailed(ci);
                            continue;
                        }
                        // Figure out who we should run the file operations as
                        Partition p = resolvePartition(ci);
                        if (p == null && ci.partName != null) {
                            LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
                            continue;
                        }
                        // Compaction doesn't work under a transaction and hence pass null for validTxnList
                        // The response will have one entry per table and hence we get only one ValidWriteIdList
                        String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
                        GetValidWriteIdsRequest rqst = new GetValidWriteIdsRequest(Collections.singletonList(fullTableName), null);
                        ValidWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(rqst).getTblValidWriteIds().get(0));
                        StorageDescriptor sd = resolveStorageDescriptor(t, p);
                        String runAs = findUserToRunAs(sd.getLocation(), t);
                        /*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
              * Long term we should consider having a thread pool here and running checkForCompactionS
              * in parallel*/
                        CompactionType compactionNeeded = checkForCompaction(ci, tblValidWriteIds, sd, t.getParameters(), runAs);
                        if (compactionNeeded != null)
                            requestCompaction(ci, runAs, compactionNeeded);
                    } catch (Throwable t) {
                        LOG.error("Caught exception while trying to determine if we should compact " + ci + ".  Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
                        txnHandler.markFailed(ci);
                    }
                }
                // Check for timed out remote workers.
                recoverFailedCompactions(true);
                // Clean anything from the txns table that has no components left in txn_components.
                txnHandler.cleanEmptyAbortedTxns();
            } catch (Throwable t) {
                LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
            } finally {
                if (handle != null) {
                    handle.releaseLocks();
                }
            }
            long elapsedTime = System.currentTimeMillis() - startedAt;
            if (elapsedTime >= checkInterval || stop.get())
                continue;
            else
                Thread.sleep(checkInterval - elapsedTime);
        } while (!stop.get());
    } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) GetValidWriteIdsRequest(org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo)

Example 48 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestCleaner method cleanupAfterMinorPartitionCompaction.

@Test
public void cleanupAfterMinorPartitionCompaction() throws Exception {
    Table t = newTable("default", "camipc", true);
    Partition p = newPartition(t, "today");
    addBaseFile(t, p, 20L, 20);
    addDeltaFile(t, p, 21L, 22L, 2);
    addDeltaFile(t, p, 23L, 24L, 2);
    addDeltaFile(t, p, 21L, 24L, 4);
    burnThroughTransactions("default", "camipc", 25);
    CompactionRequest rqst = new CompactionRequest("default", "camipc", CompactionType.MINOR);
    rqst.setPartitionname("ds=today");
    txnHandler.compact(rqst);
    CompactionInfo ci = txnHandler.findNextToCompact("fred");
    txnHandler.markCompacted(ci);
    txnHandler.setRunAs(ci.id, System.getProperty("user.name"));
    startCleaner();
    // Check there are no compactions requests left.
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(1, rsp.getCompactsSize());
    Assert.assertTrue(TxnStore.SUCCEEDED_RESPONSE.equals(rsp.getCompacts().get(0).getState()));
    // Check that the files are removed
    List<Path> paths = getDirectories(conf, t, p);
    Assert.assertEquals(2, paths.size());
    boolean sawBase = false, sawDelta = false;
    for (Path path : paths) {
        if (path.getName().equals("base_20"))
            sawBase = true;
        else if (path.getName().equals(makeDeltaDirNameCompacted(21, 24)))
            sawDelta = true;
        else
            Assert.fail("Unexpected file " + path.getName());
    }
    Assert.assertTrue(sawBase);
    Assert.assertTrue(sawDelta);
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 49 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestCleaner method cleanupAfterMajorPartitionCompaction.

@Test
public void cleanupAfterMajorPartitionCompaction() throws Exception {
    Table t = newTable("default", "campc", true);
    Partition p = newPartition(t, "today");
    addBaseFile(t, p, 20L, 20);
    addDeltaFile(t, p, 21L, 22L, 2);
    addDeltaFile(t, p, 23L, 24L, 2);
    addBaseFile(t, p, 25L, 25);
    burnThroughTransactions("default", "campc", 25);
    CompactionRequest rqst = new CompactionRequest("default", "campc", CompactionType.MAJOR);
    rqst.setPartitionname("ds=today");
    txnHandler.compact(rqst);
    CompactionInfo ci = txnHandler.findNextToCompact("fred");
    txnHandler.markCompacted(ci);
    txnHandler.setRunAs(ci.id, System.getProperty("user.name"));
    startCleaner();
    // Check there are no compactions requests left.
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals(1, rsp.getCompactsSize());
    Assert.assertTrue(TxnStore.SUCCEEDED_RESPONSE.equals(rsp.getCompacts().get(0).getState()));
    // Check that the files are removed
    List<Path> paths = getDirectories(conf, t, p);
    Assert.assertEquals(1, paths.size());
    Assert.assertEquals("base_25", paths.get(0).getName());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 50 with ShowCompactResponse

use of org.apache.hadoop.hive.metastore.api.ShowCompactResponse in project hive by apache.

the class TestCleaner method blockedByLockPartition.

@Test
public void blockedByLockPartition() throws Exception {
    Table t = newTable("default", "bblp", true);
    Partition p = newPartition(t, "today");
    addBaseFile(t, p, 20L, 20);
    addDeltaFile(t, p, 21L, 22L, 2);
    addDeltaFile(t, p, 23L, 24L, 2);
    addDeltaFile(t, p, 21L, 24L, 4);
    burnThroughTransactions("default", "bblp", 25);
    CompactionRequest rqst = new CompactionRequest("default", "bblp", CompactionType.MINOR);
    rqst.setPartitionname("ds=today");
    txnHandler.compact(rqst);
    CompactionInfo ci = txnHandler.findNextToCompact("fred");
    txnHandler.markCompacted(ci);
    txnHandler.setRunAs(ci.id, System.getProperty("user.name"));
    LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.PARTITION, "default");
    comp.setTablename("bblp");
    comp.setPartitionname("ds=today");
    comp.setOperationType(DataOperationType.DELETE);
    List<LockComponent> components = new ArrayList<LockComponent>(1);
    components.add(comp);
    LockRequest req = new LockRequest(components, "me", "localhost");
    OpenTxnsResponse resp = txnHandler.openTxns(new OpenTxnRequest(1, "Dracula", "Transylvania"));
    req.setTxnid(resp.getTxn_ids().get(0));
    LockResponse res = txnHandler.lock(req);
    startCleaner();
    // Check there are no compactions requests left.
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(1, compacts.size());
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    Assert.assertEquals("bblp", compacts.get(0).getTablename());
    Assert.assertEquals("ds=today", compacts.get(0).getPartitionname());
    Assert.assertEquals(CompactionType.MINOR, compacts.get(0).getType());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) ArrayList(java.util.ArrayList) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) OpenTxnRequest(org.apache.hadoop.hive.metastore.api.OpenTxnRequest) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) OpenTxnsResponse(org.apache.hadoop.hive.metastore.api.OpenTxnsResponse) Test(org.junit.Test)

Aggregations

ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)78 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)75 Test (org.junit.Test)70 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)54 Table (org.apache.hadoop.hive.metastore.api.Table)48 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)41 ArrayList (java.util.ArrayList)29 Path (org.apache.hadoop.fs.Path)22 LockComponent (org.apache.hadoop.hive.metastore.api.LockComponent)22 LockRequest (org.apache.hadoop.hive.metastore.api.LockRequest)22 LockResponse (org.apache.hadoop.hive.metastore.api.LockResponse)22 Partition (org.apache.hadoop.hive.metastore.api.Partition)20 FileSystem (org.apache.hadoop.fs.FileSystem)16 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)15 FileStatus (org.apache.hadoop.fs.FileStatus)14 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)13 CommitTxnRequest (org.apache.hadoop.hive.metastore.api.CommitTxnRequest)12 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 AbortTxnRequest (org.apache.hadoop.hive.metastore.api.AbortTxnRequest)6 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)6