use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class BasicStatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
String tableFullName = table.getDbName() + "." + table.getTableName();
List<Partish> partishes = new ArrayList<>();
if (partitions == null) {
Partish p;
partishes.add(p = new Partish.PTable(table));
BasicStatsProcessor basicStatsProcessor = new BasicStatsProcessor(p, work, conf, followedColStats);
basicStatsProcessor.collectFileStatus(wh, conf);
Table res = (Table) basicStatsProcessor.process(statsAggregator);
if (res == null) {
return 0;
}
db.alterTable(tableFullName, res, environmentContext, true);
TransactionalStatsProcessor transactionalStatsProcessor = new TransactionalStatsProcessor(db, p);
transactionalStatsProcessor.process(statsAggregator);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(p.getPartParameters()) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(p.getPartParameters()) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
final ExecutorService pool = buildBasicStatsExecutor();
final List<Future<Void>> futures = Lists.newLinkedList();
List<BasicStatsProcessor> processors = Lists.newLinkedList();
List<TransactionalStatsProcessor> transactionalStatsProcessors = Lists.newLinkedList();
try {
for (final Partition partn : partitions) {
Partish p;
BasicStatsProcessor bsp = new BasicStatsProcessor(p = new Partish.PPart(table, partn), work, conf, followedColStats);
processors.add(bsp);
transactionalStatsProcessors.add(new TransactionalStatsProcessor(db, p));
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
bsp.collectFileStatus(wh, conf);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
// cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions!");
}
for (BasicStatsProcessor basicStatsProcessor : processors) {
Object res = basicStatsProcessor.process(statsAggregator);
if (res == null) {
LOG.info("Partition " + basicStatsProcessor.partish.getPartition().getSpec() + " stats: [0]");
continue;
}
updates.add((Partition) res);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + basicStatsProcessor.partish.getPartition().getSpec() + " stats: [" + toString(basicStatsProcessor.partish.getPartParameters()) + ']');
}
LOG.info("Partition " + basicStatsProcessor.partish.getPartition().getSpec() + " stats: [" + toString(basicStatsProcessor.partish.getPartParameters()) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext, true);
}
for (TransactionalStatsProcessor transactionalStatsProcessor : transactionalStatsProcessors) {
transactionalStatsProcessor.process(statsAggregator);
}
if (work.isStatsReliable() && updates.size() != processors.size()) {
LOG.info("Stats should be reliadble...however seems like there were some issue.. => ret 1");
ret = 1;
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class CreateMaterializedViewOperation method execute.
@Override
public int execute() throws HiveException {
Table oldview = context.getDb().getTable(desc.getViewName(), false);
if (oldview != null) {
if (desc.getIfNotExists()) {
return 0;
}
// Materialized View already exists, thus we should be replacing
throw new HiveException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(desc.getViewName()));
} else {
// We create new view
Table tbl = desc.toTable(context.getConf());
// We set the signature for the view if it is a materialized view
if (tbl.isMaterializedView()) {
Set<SourceTable> sourceTables = new HashSet<>(desc.getTablesUsed().size());
for (TableName tableName : desc.getTablesUsed()) {
sourceTables.add(context.getDb().getTable(tableName).createSourceTable());
}
MaterializedViewMetadata metadata = new MaterializedViewMetadata(MetaStoreUtils.getDefaultCatalog(context.getConf()), tbl.getDbName(), tbl.getTableName(), sourceTables, context.getConf().get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
tbl.setMaterializedViewMetadata(metadata);
}
context.getDb().createTable(tbl, desc.getIfNotExists());
DDLUtils.addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK), context.getWork().getOutputs());
// set lineage info
DataContainer dc = new DataContainer(tbl.getTTable());
Map<String, String> tblProps = tbl.getTTable().getParameters();
Path tlocation = null;
try {
Warehouse wh = new Warehouse(context.getConf());
tlocation = wh.getDefaultTablePath(context.getDb().getDatabase(tbl.getDbName()), tbl.getTableName(), tblProps == null || !AcidUtils.isTablePropertyTransactional(tblProps));
} catch (MetaException e) {
throw new HiveException(e);
}
context.getQueryState().getLineageState().setLineage(tlocation, dc, tbl.getCols());
}
return 0;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class SemanticAnalyzer method getMetaData.
@SuppressWarnings("nls")
private void getMetaData(QB qb, ReadEntity parentInput) throws HiveException {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map<String, Pair<String, ReadEntity>> aliasToViewInfo = new HashMap<String, Pair<String, ReadEntity>>();
/*
* used to capture view to SQ conversions. This is used to check for
* recursive CTE invocations.
*/
Map<String, String> sqAliasToCTEName = new HashMap<String, String>();
for (String alias : tabAliases) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
// Get table details from tabNameToTabObject cache
Table tab = aliasToCTEs.containsKey(tabName) ? null : getTableObjectByName(tabName, false);
if (tab != null) {
Table newTab = tab.makeCopy();
tab = newTab;
}
if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
Table materializedTab = ctx.getMaterializedTable(cteName);
if (materializedTab == null) {
// we first look for this alias from CTE, and then from catalog.
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
sqAliasToCTEName.put(alias, cteName);
continue;
}
tab = materializeCTE(cteName, cte);
}
} else {
tab = materializedTab;
}
}
if (tab == null) {
if (tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) {
continue;
}
ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
if (src.getChildCount() == 3) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg() + " '" + src.getChild(2).getText() + "'");
}
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_TABLE.getMsg(), src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
Pair<String, String> asOf = qb.getAsOfForAlias(alias);
if (asOf != null) {
if (!Optional.ofNullable(tab.getStorageHandler()).map(HiveStorageHandler::isTimeTravelAllowed).orElse(false)) {
throw new SemanticException(ErrorMsg.TIME_TRAVEL_NOT_ALLOWED, alias);
}
tab.setAsOfVersion(asOf.getLeft());
tab.setAsOfTimestamp(asOf.getRight());
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getFullyQualifiedName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName + " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + " -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
// If the view is Inside another view, it should have at least one parent
if (qb.isInsideView() && parentInput == null) {
parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
}
ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, Pair.of(fullViewName, viewInput));
String aliasId = getAliasId(alias, qb);
if (aliasId != null) {
aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "").replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
}
viewAliasToInput.put(aliasId, viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) {
// dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), "Cannot get partitions for " + ts.partSpec), e);
}
}
tab.setTableSpec(ts);
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
// Temporary tables created during the execution are not the input sources
if (!PlanUtils.isValuesTempTable(alias)) {
PlanUtils.addInput(inputs, new ReadEntity(tab, parentViewInfo, parentViewInfo == null), mergeIsDirect);
}
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getLeft());
newParentInput = aliasToViewInfo.get(alias).getRight();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch(ast.getToken().getType()) {
case HiveParser.TOK_TAB:
{
TableSpec ts = new TableSpec(db, conf, ast);
if (ts.tableHandle.isView() || (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!ts.tableHandle.isNonNative() && !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(), ast, "The class is " + outputFormatClass.toString()));
}
boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName()).toLowerCase()) != null);
assert isTableWrittenTo : "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle);
if ((updating(name) || deleting(name)) && !isFullAcid) {
if (!AcidUtils.isInsertOnlyTable(ts.tableHandle)) {
// here, it means the table itself doesn't support it.
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, ts.getTableName().getTable());
} else {
throw new SemanticException(ErrorMsg.ACID_OP_ON_INSERTONLYTRAN_TABLE, ts.getTableName().getTable());
}
}
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.getTableName().getTable().toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_DIR:
{
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ()) && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS() || qb.isMaterializedView()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
Path location;
// If the CTAS query does specify a location, use the table location, else use the db location
if (qb.isMaterializedView() && qb.getViewDesc() != null && qb.getViewDesc().getLocation() != null) {
location = new Path(qb.getViewDesc().getLocation());
} else if (qb.isCTAS() && qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
location = new Path(qb.getTableDesc().getLocation());
} else {
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
try {
Warehouse wh = new Warehouse(conf);
// Use destination table's db location.
String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null;
if (destTableDb == null) {
destTableDb = names[0];
}
boolean useExternal = false;
if (qb.isMaterializedView()) {
useExternal = !AcidUtils.isTransactionalView(qb.getViewDesc()) && !makeAcid();
} else {
useExternal = (qb.getTableDesc() == null || qb.getTableDesc().isTemporary() || qb.getTableDesc().isExternal() || !makeAcid());
}
if (useExternal) {
location = wh.getDatabaseExternalPath(db.getDatabase(destTableDb));
} else {
location = wh.getDatabaseManagedPath(db.getDatabase(destTableDb));
}
} catch (MetaException e) {
throw new SemanticException(e);
}
}
try {
CreateTableDesc tblDesc = qb.getTableDesc();
if (tblDesc != null && tblDesc.isTemporary() && AcidUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) {
fname = FileUtils.makeQualified(location, conf).toString();
} else {
fname = ctx.getExtTmpPathRelTo(FileUtils.makeQualified(location, conf)).toString();
}
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast, "Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
TableSpec ts = new TableSpec(db, conf, this.ast);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.getTableName().getTable().toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults to false.
qb.setIsQuery(true);
Path stagingPath = getStagingDirectoryPathname(qb);
fname = stagingPath.toString();
ctx.setResDir(stagingPath);
}
}
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
// Set the destination for the SELECT query inside the CTAS
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
directoryDesc.setInputFormat(storageFormat.getInputFormat());
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch(child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet = true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
directoryDesc.setSerName(storageFormat.getSerde());
if (serdeChild.getChildCount() > 1) {
directoryDesc.setSerdeProps(new HashMap<String, String>());
readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
}
directoryDescIsSet = true;
break;
}
}
}
if (directoryDescIsSet) {
qb.setDirectoryDesc(directoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast, "Unknown Token Type " + ast.getToken().getType()));
}
}
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TaskCompiler method getDefaultCtasLocation.
private Path getDefaultCtasLocation(final ParseContext pCtx) throws SemanticException {
try {
String protoName = null;
boolean isExternal = false;
if (pCtx.getQueryProperties().isCTAS()) {
protoName = pCtx.getCreateTable().getDbTableName();
isExternal = pCtx.getCreateTable().isExternal();
} else if (pCtx.getQueryProperties().isMaterializedView()) {
protoName = pCtx.getCreateViewDesc().getViewName();
}
String[] names = Utilities.getDbTableName(protoName);
if (!db.databaseExists(names[0])) {
throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
}
Warehouse wh = new Warehouse(conf);
return wh.getDefaultTablePath(db.getDatabase(names[0]), names[1], isExternal);
} catch (HiveException e) {
throw new SemanticException(e);
} catch (MetaException e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class StorageBasedAuthorizationProvider method initWh.
/**
* Make sure that the warehouse variable is set up properly.
* @throws MetaException if unable to instantiate
*/
private void initWh() throws MetaException, HiveException {
if (wh == null) {
if (!isRunFromMetaStore) {
// Note, although HiveProxy has a method that allows us to check if we're being
// called from the metastore or from the client, we don't have an initialized HiveProxy
// till we explicitly initialize it as being from the client side. So, we have a
// chicken-and-egg problem. So, we now track whether or not we're running from client-side
// in the SBAP itself.
hive_db = new HiveProxy(Hive.get(getConf(), StorageBasedAuthorizationProvider.class));
this.wh = new Warehouse(getConf());
if (this.wh == null) {
// If wh is still null after just having initialized it, bail out - something's very wrong.
throw new IllegalStateException("Unable to initialize Warehouse from clientside.");
}
} else {
// this means handler.getWh() is returning null. Error out.
throw new IllegalStateException("Uninitialized Warehouse from MetastoreHandler");
}
}
}
Aggregations