use of org.apache.hadoop.hive.metastore.api.FindNextCompactRequest in project hive by apache.
the class Worker method findNextCompactionAndExecute.
/**
* Finds the next compaction and executes it. The main thread might interrupt the execution of this method
* in case of timeout.
* @param collectGenericStats If true then for both MR and Query based compaction the stats are regenerated
* @param collectMrStats If true then for MR compaction the stats are regenerated
* @return Returns true, if there was compaction in the queue, and we started working on it.
*/
@VisibleForTesting
protected Boolean findNextCompactionAndExecute(boolean collectGenericStats, boolean collectMrStats) {
// Make sure nothing escapes this run method and kills the metastore at large,
// so wrap it in a big catch Throwable statement.
PerfLogger perfLogger = SessionState.getPerfLogger(false);
String workerMetric = null;
CompactionInfo ci = null;
boolean computeStats = false;
Table t1 = null;
// is need to be obtained here.
if (msc == null) {
try {
msc = HiveMetaStoreUtils.getHiveMetastoreClient(conf);
} catch (Exception e) {
LOG.error("Failed to connect to HMS", e);
return false;
}
}
try (CompactionTxn compactionTxn = new CompactionTxn()) {
FindNextCompactRequest findNextCompactRequest = new FindNextCompactRequest();
findNextCompactRequest.setWorkerId(workerName);
findNextCompactRequest.setWorkerVersion(runtimeVersion);
ci = CompactionInfo.optionalCompactionInfoStructToInfo(msc.findNextCompact(findNextCompactRequest));
LOG.debug("Processing compaction request " + ci);
if (ci == null) {
return false;
}
if ((runtimeVersion != null || ci.initiatorVersion != null) && !runtimeVersion.equals(ci.initiatorVersion)) {
LOG.warn("Worker and Initiator versions do not match. Worker: v{}, Initiator: v{}", runtimeVersion, ci.initiatorVersion);
}
checkInterrupt();
if (MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON)) {
workerMetric = MetricsConstants.COMPACTION_WORKER_CYCLE + "_" + (ci.type != null ? ci.type.toString().toLowerCase() : null);
perfLogger.perfLogBegin(CLASS_NAME, workerMetric);
}
// Find the table we will be working with.
try {
t1 = resolveTable(ci);
if (t1 == null) {
LOG.info("Unable to find table " + ci.getFullTableName() + ", assuming it was dropped and moving on.");
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
return false;
}
} catch (MetaException e) {
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
return false;
}
checkInterrupt();
// This chicanery is to get around the fact that the table needs to be final in order to
// go into the doAs below.
final Table t = t1;
String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
// Find the partition we will be working with, if there is one.
Partition p;
try {
p = resolvePartition(ci);
if (p == null && ci.partName != null) {
LOG.info("Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on.");
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
return false;
}
} catch (Exception e) {
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
return false;
}
checkInterrupt();
// Find the appropriate storage descriptor
final StorageDescriptor sd = resolveStorageDescriptor(t, p);
// Check that the table or partition isn't sorted, as we don't yet support that.
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
LOG.error("Attempt to compact sorted table " + ci.getFullTableName() + ", which is not yet supported!");
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
return false;
}
if (ci.runAs == null) {
ci.runAs = TxnUtils.findUserToRunAs(sd.getLocation(), t, conf);
}
checkInterrupt();
/**
* we cannot have Worker use HiveTxnManager (which is on ThreadLocal) since
* then the Driver would already have the an open txn but then this txn would have
* multiple statements in it (for query based compactor) which is not supported (and since
* this case some of the statements are DDL, even in the future will not be allowed in a
* multi-stmt txn. {@link Driver#setCompactionWriteIds(ValidWriteIdList, long)}
*/
compactionTxn.open(ci);
ValidTxnList validTxnList = msc.getValidTxns(compactionTxn.getTxnId());
// with this ValidWriteIdList is capped at whatever HWM validTxnList has
final ValidCompactorWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(msc.getValidWriteIds(Collections.singletonList(fullTableName), validTxnList.writeToString()).get(0));
LOG.debug("ValidCompactWriteIdList: " + tblValidWriteIds.writeToString());
conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
ci.highestWriteId = tblValidWriteIds.getHighWatermark();
// this writes TXN_COMPONENTS to ensure that if compactorTxnId fails, we keep metadata about
// it until after any data written by it are physically removed
msc.updateCompactorState(CompactionInfo.compactionInfoToStruct(ci), compactionTxn.getTxnId());
checkInterrupt();
final StringBuilder jobName = new StringBuilder(workerName);
jobName.append("-compactor-");
jobName.append(ci.getFullPartitionName());
// Don't start compaction or cleaning if not necessary
if (isDynPartAbort(t, ci)) {
msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
compactionTxn.wasSuccessful();
return false;
}
AcidDirectory dir = getAcidStateForWorker(ci, sd, tblValidWriteIds);
if (!isEnoughToCompact(ci.isMajorCompaction(), dir, sd)) {
if (needsCleaning(dir, sd)) {
msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
} else {
// do nothing
msc.markCleaned(CompactionInfo.compactionInfoToStruct(ci));
}
compactionTxn.wasSuccessful();
return false;
}
if (!ci.isMajorCompaction() && !isMinorCompactionSupported(t.getParameters(), dir)) {
ci.errorMessage = String.format("Query based Minor compaction is not possible for full acid tables having raw " + "format (non-acid) data in them. Compaction type: %s, Partition: %s, Compaction id: %d", ci.type.toString(), ci.getFullPartitionName(), ci.id);
LOG.error(ci.errorMessage);
try {
msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
} catch (Throwable tr) {
LOG.error("Caught an exception while trying to mark compaction {} as failed: {}", ci, tr);
}
return false;
}
checkInterrupt();
try {
failCompactionIfSetForTest();
/*
First try to run compaction via HiveQL queries.
Compaction for MM tables happens here, or run compaction for Crud tables if query-based compaction is enabled.
todo Find a more generic approach to collecting files in the same logical bucket to compact within the same
task (currently we're using Tez split grouping).
*/
QueryCompactor queryCompactor = QueryCompactorFactory.getQueryCompactor(t, conf, ci);
computeStats = (queryCompactor == null && collectMrStats) || collectGenericStats;
LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName() + ", id:" + ci.id + " in " + compactionTxn + " with compute stats set to " + computeStats);
if (queryCompactor != null) {
LOG.info("Will compact id: " + ci.id + " with query-based compactor class: " + queryCompactor.getClass().getName());
queryCompactor.runCompaction(conf, t, p, sd, tblValidWriteIds, ci, dir);
} else {
LOG.info("Will compact id: " + ci.id + " via MR job");
runCompactionViaMrJob(ci, t, p, sd, tblValidWriteIds, jobName, dir);
}
LOG.info("Completed " + ci.type.toString() + " compaction for " + ci.getFullPartitionName() + " in " + compactionTxn + ", marking as compacted.");
msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
compactionTxn.wasSuccessful();
AcidMetricService.updateMetricsFromWorker(ci.dbname, ci.tableName, ci.partName, ci.type, dir.getCurrentDirectories().size(), dir.getDeleteDeltas().size(), conf, msc);
} catch (Throwable e) {
LOG.error("Caught exception while trying to compact " + ci + ". Marking failed to avoid repeated failures", e);
final CompactionType ctype = ci.type;
markFailed(ci, e.getMessage());
if (runJobAsSelf(ci.runAs)) {
cleanupResultDirs(sd, tblValidWriteIds, ctype, dir);
} else {
LOG.info("Cleaning as user " + ci.runAs);
UserGroupInformation ugi = UserGroupInformation.createProxyUser(ci.runAs, UserGroupInformation.getLoginUser());
ugi.doAs((PrivilegedExceptionAction<Void>) () -> {
cleanupResultDirs(sd, tblValidWriteIds, ctype, dir);
return null;
});
try {
FileSystem.closeAllForUGI(ugi);
} catch (IOException ex) {
LOG.error("Could not clean up file-system handles for UGI: " + ugi, e);
}
}
}
} catch (TException | IOException t) {
LOG.error("Caught an exception in the main loop of compactor worker " + workerName, t);
markFailed(ci, t.getMessage());
if (msc != null) {
msc.close();
msc = null;
}
} catch (Throwable t) {
LOG.error("Caught an exception in the main loop of compactor worker " + workerName, t);
} finally {
if (workerMetric != null && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON)) {
perfLogger.perfLogEnd(CLASS_NAME, workerMetric);
}
}
if (computeStats) {
StatsUpdater.gatherStats(ci, conf, runJobAsSelf(ci.runAs) ? ci.runAs : t1.getOwner(), CompactorUtil.getCompactorJobQueueName(conf, ci, t1));
}
return true;
}
use of org.apache.hadoop.hive.metastore.api.FindNextCompactRequest in project hive by apache.
the class TestCompactionTxnHandler method aFindNextCompactRequest.
private static FindNextCompactRequest aFindNextCompactRequest(String workerId, String workerVersion) {
FindNextCompactRequest request = new FindNextCompactRequest();
request.setWorkerId(workerId);
request.setWorkerVersion(workerVersion);
return request;
}
use of org.apache.hadoop.hive.metastore.api.FindNextCompactRequest in project hive by apache.
the class TestCleaner method cleanupAfterMajorTableCompactionWithLongRunningQuery.
@Test
public void cleanupAfterMajorTableCompactionWithLongRunningQuery() throws Exception {
Table t = newTable("default", "camtc", false);
addBaseFile(t, null, 20L, 20);
addDeltaFile(t, null, 21L, 22L, 2);
addDeltaFile(t, null, 23L, 24L, 2);
addBaseFile(t, null, 25L, 25, 26);
burnThroughTransactions("default", "camtc", 25);
CompactionRequest rqst = new CompactionRequest("default", "camtc", CompactionType.MAJOR);
txnHandler.compact(rqst);
FindNextCompactRequest findNextCompactRequest = new FindNextCompactRequest();
findNextCompactRequest.setWorkerId("fred");
findNextCompactRequest.setWorkerVersion("4.0.0");
CompactionInfo ci = txnHandler.findNextToCompact(findNextCompactRequest);
ci.runAs = System.getProperty("user.name");
long compactTxn = openTxn(TxnType.COMPACTION);
ValidTxnList validTxnList = TxnCommonUtils.createValidReadTxnList(txnHandler.getOpenTxns(Collections.singletonList(TxnType.READ_ONLY)), compactTxn);
GetValidWriteIdsRequest validWriteIdsRqst = new GetValidWriteIdsRequest(Collections.singletonList(ci.getFullTableName()));
validWriteIdsRqst.setValidTxnList(validTxnList.writeToString());
ValidCompactorWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(validWriteIdsRqst).getTblValidWriteIds().get(0));
ci.highestWriteId = tblValidWriteIds.getHighWatermark();
txnHandler.updateCompactorState(ci, compactTxn);
txnHandler.markCompacted(ci);
// Open a query during compaction
long longQuery = openTxn();
txnHandler.commitTxn(new CommitTxnRequest(compactTxn));
startCleaner();
// The long running query should prevent the cleanup
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(1, rsp.getCompactsSize());
Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState());
// Check that the files are not removed
List<Path> paths = getDirectories(conf, t, null);
Assert.assertEquals(4, paths.size());
// After the commit cleaning can proceed
txnHandler.commitTxn(new CommitTxnRequest(longQuery));
Thread.sleep(MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.TXN_OPENTXN_TIMEOUT, TimeUnit.MILLISECONDS));
startCleaner();
rsp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals(1, rsp.getCompactsSize());
Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
// Check that the files are removed
paths = getDirectories(conf, t, null);
Assert.assertEquals(1, paths.size());
Assert.assertEquals("base_25_v26", paths.get(0).getName());
}
use of org.apache.hadoop.hive.metastore.api.FindNextCompactRequest in project hive by apache.
the class TestHiveMetaStoreTxns method testGetLatestCommittedCompactionInfo.
@Test
public void testGetLatestCommittedCompactionInfo() throws Exception {
final String dbName = "mydb";
final String tblName = "mytable";
Database db = new DatabaseBuilder().setName(dbName).build(conf);
db.unsetCatalogName();
client.createDatabase(db);
Table tbl = new TableBuilder().setDbName(dbName).setTableName(tblName).addCol("id", "int").addCol("name", "string").setType(TableType.MANAGED_TABLE.name()).build(conf);
client.createTable(tbl);
tbl = client.getTable(dbName, tblName);
client.compact2(tbl.getDbName(), tbl.getTableName(), null, CompactionType.MINOR, new HashMap<>());
FindNextCompactRequest compactRequest = new FindNextCompactRequest();
compactRequest.setWorkerId("myworker");
OptionalCompactionInfoStruct optionalCi = client.findNextCompact(compactRequest);
client.markCleaned(optionalCi.getCi());
GetLatestCommittedCompactionInfoRequest rqst = new GetLatestCommittedCompactionInfoRequest();
// Test invalid inputs
final String invalidTblName = "invalid";
rqst.setDbname(dbName);
Assert.assertThrows(MetaException.class, () -> client.getLatestCommittedCompactionInfo(rqst));
rqst.setTablename(invalidTblName);
GetLatestCommittedCompactionInfoResponse response = client.getLatestCommittedCompactionInfo(rqst);
Assert.assertNotNull(response);
Assert.assertEquals(0, response.getCompactionsSize());
// Test normal inputs
rqst.setTablename(tblName);
response = client.getLatestCommittedCompactionInfo(rqst);
Assert.assertNotNull(response);
Assert.assertEquals(1, response.getCompactionsSize());
CompactionInfoStruct lci = response.getCompactions().get(0);
Assert.assertEquals(1, lci.getId());
Assert.assertNull(lci.getPartitionname());
Assert.assertEquals(CompactionType.MINOR, lci.getType());
}
use of org.apache.hadoop.hive.metastore.api.FindNextCompactRequest in project hive by apache.
the class TestInitiator method aFindNextCompactRequest.
private static FindNextCompactRequest aFindNextCompactRequest(String workerId, String workerVersion) {
FindNextCompactRequest request = new FindNextCompactRequest();
request.setWorkerId(workerId);
request.setWorkerVersion(workerVersion);
return request;
}
Aggregations