use of org.apache.hadoop.hive.metastore.Warehouse in project flink by apache.
the class HiveParserSemanticAnalyzer method getMetaData.
@SuppressWarnings("nls")
private void getMetaData(HiveParserQB qb, ReadEntity parentInput) throws HiveException {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures. We have to materialize the table
// alias list since we might
// modify it in the middle for view rewrite.
List<String> tabAliases = new ArrayList<>(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo = new HashMap<>();
// used to capture view to SQ conversions. This is used to check for recursive CTE
// invocations.
Map<String, String> sqAliasToCTEName = new HashMap<>();
for (String alias : tabAliases) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
Table tab = db.getTable(tabName, false);
if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
// we first look for this alias from CTE, and then from catalog.
HiveParserBaseSemanticAnalyzer.CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
sqAliasToCTEName.put(alias, cteName);
continue;
}
throw new SemanticException("Materializing CTE is not supported at the moment");
}
}
if (tab == null) {
HiveParserASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
throw new SemanticException(HiveParserErrorMsg.getMsg(ErrorMsg.INVALID_TABLE, src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getDbName() + "." + tab.getTableName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName + " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + " -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
// If the view is Inside another view, it should have at least one parent
if (qb.isInsideView() && parentInput == null) {
parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
}
ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair<>(fullViewName, viewInput));
String aliasId = getAliasId(alias, qb);
if (aliasId != null) {
aliasId = aliasId.replace(SUBQUERY_TAG_1, "").replace(SUBQUERY_TAG_2, "");
}
viewAliasToInput.put(aliasId, viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(HiveParserUtils.generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
TableSpec ts = new TableSpec(db, conf, (HiveParserASTNode) ast.getChild(0), true, this.noscan, frameworkConfig, cluster);
if (ts.specType == SpecType.DYNAMIC_PARTITION) {
// dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(HiveParserUtils.generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), "Cannot get partitions for " + ts.partSpec), e);
}
}
// validate partial scan command
HiveParserQBParseInfo qbpi = qb.getParseInfo();
if (qbpi.isPartialScanAnalyzeCommand()) {
Class<? extends InputFormat> inputFormatClass = null;
switch(ts.specType) {
case TABLE_ONLY:
case DYNAMIC_PARTITION:
inputFormatClass = ts.tableHandle.getInputFormatClass();
break;
case STATIC_PARTITION:
inputFormatClass = ts.partHandle.getInputFormatClass();
break;
default:
assert false;
}
if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass.equals(OrcInputFormat.class))) {
throw new SemanticException("ANALYZE TABLE PARTIALSCAN doesn't support non-RCfile.");
}
}
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
// Temporary tables created during the execution are not the input sources
if (!HiveParserUtils.isValuesTempTable(alias)) {
HiveParserUtils.addInput(inputs, new ReadEntity(tab, parentViewInfo, parentViewInfo == null), mergeIsDirect);
}
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
HiveParserQBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
HiveParserBaseSemanticAnalyzer.HiveParserRowFormatParams rowFormatParams = new HiveParserBaseSemanticAnalyzer.HiveParserRowFormatParams();
HiveParserStorageFormat storageFormat = new HiveParserStorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related metadata
HiveParserQBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
HiveParserASTNode ast = qbp.getDestForClause(name);
switch(ast.getToken().getType()) {
case HiveASTParser.TOK_TAB:
{
TableSpec ts = new TableSpec(db, conf, ast, frameworkConfig, cluster);
if (ts.tableHandle.isView() || hiveShim.isMaterializedView(ts.tableHandle)) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!ts.tableHandle.isNonNative() && !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(HiveParserErrorMsg.getMsg(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE, ast, "The class is " + outputFormatClass.toString()));
}
boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().get(getUnescapedName((HiveParserASTNode) ast.getChild(0), ts.tableHandle.getDbName())) != null);
assert isTableWrittenTo : "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveASTParser.TOK_DIR:
{
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ()) && (((HiveParserASTNode) ast.getChild(0)).getToken().getType() == HiveASTParser.TOK_TMP_FILE)) {
if (qb.isCTAS() || qb.isMaterializedView()) {
qb.setIsQuery(false);
Path location;
// location, else use the db location
if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
location = new Path(qb.getTableDesc().getLocation());
} else {
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((HiveParserASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
try {
Warehouse wh = new Warehouse(conf);
// Use destination table's db location.
String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null;
if (destTableDb == null) {
destTableDb = names[0];
}
location = wh.getDatabasePath(db.getDatabase(destTableDb));
} catch (MetaException e) {
throw new SemanticException(e);
}
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
TableSpec ts = new TableSpec(db, conf, this.ast, frameworkConfig, cluster);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults
// to false.
qb.setIsQuery(true);
}
}
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
// Set the destination for the SELECT query inside the CTAS
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh; num++) {
HiveParserASTNode child = (HiveParserASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch(child.getToken().getType()) {
case HiveASTParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet = true;
break;
case HiveASTParser.TOK_TABLESERIALIZER:
HiveParserASTNode serdeChild = (HiveParserASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
directoryDesc.setSerName(storageFormat.getSerde());
if (serdeChild.getChildCount() > 1) {
directoryDesc.setSerdeProps(new HashMap<String, String>());
readProps((HiveParserASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
}
directoryDescIsSet = true;
break;
}
}
}
if (directoryDescIsSet) {
qb.setDirectoryDesc(directoryDesc);
}
break;
}
default:
throw new SemanticException(HiveParserUtils.generateErrorMessage(ast, "Unknown Token Type " + ast.getToken().getType()));
}
}
}
Aggregations