Search in sources :

Example 1 with TableSpec

use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableSpec in project flink by apache.

the class HiveParserSemanticAnalyzer method getMetaData.

@SuppressWarnings("nls")
private void getMetaData(HiveParserQB qb, ReadEntity parentInput) throws HiveException {
    LOG.info("Get metadata for source tables");
    // Go over the tables and populate the related structures. We have to materialize the table
    // alias list since we might
    // modify it in the middle for view rewrite.
    List<String> tabAliases = new ArrayList<>(qb.getTabAliases());
    // Keep track of view alias to view name and read entity
    // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
    // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
    // This is needed for tracking the dependencies for inputs, along with their parents.
    Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo = new HashMap<>();
    // used to capture view to SQ conversions. This is used to check for recursive CTE
    // invocations.
    Map<String, String> sqAliasToCTEName = new HashMap<>();
    for (String alias : tabAliases) {
        String tabName = qb.getTabNameForAlias(alias);
        String cteName = tabName.toLowerCase();
        Table tab = db.getTable(tabName, false);
        if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
            // we first look for this alias from CTE, and then from catalog.
            HiveParserBaseSemanticAnalyzer.CTEClause cte = findCTEFromName(qb, cteName);
            if (cte != null) {
                if (!cte.materialize) {
                    addCTEAsSubQuery(qb, cteName, alias);
                    sqAliasToCTEName.put(alias, cteName);
                    continue;
                }
                throw new SemanticException("Materializing CTE is not supported at the moment");
            }
        }
        if (tab == null) {
            HiveParserASTNode src = qb.getParseInfo().getSrcForAlias(alias);
            if (null != src) {
                throw new SemanticException(HiveParserErrorMsg.getMsg(ErrorMsg.INVALID_TABLE, src));
            } else {
                throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
            }
        }
        if (tab.isView()) {
            if (qb.getParseInfo().isAnalyzeCommand()) {
                throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
            }
            String fullViewName = tab.getDbName() + "." + tab.getTableName();
            // Prevent view cycles
            if (viewsExpanded.contains(fullViewName)) {
                throw new SemanticException("Recursive view " + fullViewName + " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + " -> " + fullViewName + ").");
            }
            replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
            // If the view is Inside another view, it should have at least one parent
            if (qb.isInsideView() && parentInput == null) {
                parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
            }
            ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
            viewInput = PlanUtils.addInput(inputs, viewInput);
            aliasToViewInfo.put(alias, new ObjectPair<>(fullViewName, viewInput));
            String aliasId = getAliasId(alias, qb);
            if (aliasId != null) {
                aliasId = aliasId.replace(SUBQUERY_TAG_1, "").replace(SUBQUERY_TAG_2, "");
            }
            viewAliasToInput.put(aliasId, viewInput);
            continue;
        }
        if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
            throw new SemanticException(HiveParserUtils.generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
        }
        qb.getMetaData().setSrcForAlias(alias, tab);
        if (qb.getParseInfo().isAnalyzeCommand()) {
            // allow partial partition specification for nonscan since noscan is fast.
            TableSpec ts = new TableSpec(db, conf, (HiveParserASTNode) ast.getChild(0), true, this.noscan, frameworkConfig, cluster);
            if (ts.specType == SpecType.DYNAMIC_PARTITION) {
                // dynamic partitions
                try {
                    ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
                } catch (HiveException e) {
                    throw new SemanticException(HiveParserUtils.generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), "Cannot get partitions for " + ts.partSpec), e);
                }
            }
            // validate partial scan command
            HiveParserQBParseInfo qbpi = qb.getParseInfo();
            if (qbpi.isPartialScanAnalyzeCommand()) {
                Class<? extends InputFormat> inputFormatClass = null;
                switch(ts.specType) {
                    case TABLE_ONLY:
                    case DYNAMIC_PARTITION:
                        inputFormatClass = ts.tableHandle.getInputFormatClass();
                        break;
                    case STATIC_PARTITION:
                        inputFormatClass = ts.partHandle.getInputFormatClass();
                        break;
                    default:
                        assert false;
                }
                if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass.equals(OrcInputFormat.class))) {
                    throw new SemanticException("ANALYZE TABLE PARTIALSCAN doesn't support non-RCfile.");
                }
            }
            qb.getParseInfo().addTableSpec(alias, ts);
        }
        ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
        // Temporary tables created during the execution are not the input sources
        if (!HiveParserUtils.isValuesTempTable(alias)) {
            HiveParserUtils.addInput(inputs, new ReadEntity(tab, parentViewInfo, parentViewInfo == null), mergeIsDirect);
        }
    }
    LOG.info("Get metadata for subqueries");
    // Go over the subqueries and getMetaData for these
    for (String alias : qb.getSubqAliases()) {
        boolean wasView = aliasToViewInfo.containsKey(alias);
        boolean wasCTE = sqAliasToCTEName.containsKey(alias);
        ReadEntity newParentInput = null;
        if (wasView) {
            viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
            newParentInput = aliasToViewInfo.get(alias).getSecond();
        } else if (wasCTE) {
            ctesExpanded.add(sqAliasToCTEName.get(alias));
        }
        HiveParserQBExpr qbexpr = qb.getSubqForAlias(alias);
        getMetaData(qbexpr, newParentInput);
        if (wasView) {
            viewsExpanded.remove(viewsExpanded.size() - 1);
        } else if (wasCTE) {
            ctesExpanded.remove(ctesExpanded.size() - 1);
        }
    }
    HiveParserBaseSemanticAnalyzer.HiveParserRowFormatParams rowFormatParams = new HiveParserBaseSemanticAnalyzer.HiveParserRowFormatParams();
    HiveParserStorageFormat storageFormat = new HiveParserStorageFormat(conf);
    LOG.info("Get metadata for destination tables");
    // Go over all the destination structures and populate the related metadata
    HiveParserQBParseInfo qbp = qb.getParseInfo();
    for (String name : qbp.getClauseNamesForDest()) {
        HiveParserASTNode ast = qbp.getDestForClause(name);
        switch(ast.getToken().getType()) {
            case HiveASTParser.TOK_TAB:
                {
                    TableSpec ts = new TableSpec(db, conf, ast, frameworkConfig, cluster);
                    if (ts.tableHandle.isView() || hiveShim.isMaterializedView(ts.tableHandle)) {
                        throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
                    }
                    Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
                    if (!ts.tableHandle.isNonNative() && !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
                        throw new SemanticException(HiveParserErrorMsg.getMsg(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE, ast, "The class is " + outputFormatClass.toString()));
                    }
                    boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
                    isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().get(getUnescapedName((HiveParserASTNode) ast.getChild(0), ts.tableHandle.getDbName())) != null);
                    assert isTableWrittenTo : "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
                    // but whether the table itself is partitioned is not know.
                    if (ts.specType != SpecType.STATIC_PARTITION) {
                        // This is a table or dynamic partition
                        qb.getMetaData().setDestForAlias(name, ts.tableHandle);
                        // has dynamic as well as static partitions
                        if (ts.partSpec != null && ts.partSpec.size() > 0) {
                            qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
                        }
                    } else {
                        // This is a partition
                        qb.getMetaData().setDestForAlias(name, ts.partHandle);
                    }
                    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                        // Add the table spec for the destination table.
                        qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
                    }
                    break;
                }
            case HiveASTParser.TOK_DIR:
                {
                    // This is a dfs file
                    String fname = stripQuotes(ast.getChild(0).getText());
                    if ((!qb.getParseInfo().getIsSubQ()) && (((HiveParserASTNode) ast.getChild(0)).getToken().getType() == HiveASTParser.TOK_TMP_FILE)) {
                        if (qb.isCTAS() || qb.isMaterializedView()) {
                            qb.setIsQuery(false);
                            Path location;
                            // location, else use the db location
                            if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
                                location = new Path(qb.getTableDesc().getLocation());
                            } else {
                                // allocate a temporary output dir on the location of the table
                                String tableName = getUnescapedName((HiveParserASTNode) ast.getChild(0));
                                String[] names = Utilities.getDbTableName(tableName);
                                try {
                                    Warehouse wh = new Warehouse(conf);
                                    // Use destination table's db location.
                                    String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null;
                                    if (destTableDb == null) {
                                        destTableDb = names[0];
                                    }
                                    location = wh.getDatabasePath(db.getDatabase(destTableDb));
                                } catch (MetaException e) {
                                    throw new SemanticException(e);
                                }
                            }
                            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                                TableSpec ts = new TableSpec(db, conf, this.ast, frameworkConfig, cluster);
                                // Add the table spec for the destination table.
                                qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
                            }
                        } else {
                            // This is the only place where isQuery is set to true; it defaults
                            // to false.
                            qb.setIsQuery(true);
                        }
                    }
                    boolean isDfsFile = true;
                    if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
                        isDfsFile = false;
                    }
                    // Set the destination for the SELECT query inside the CTAS
                    qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
                    CreateTableDesc directoryDesc = new CreateTableDesc();
                    boolean directoryDescIsSet = false;
                    int numCh = ast.getChildCount();
                    for (int num = 1; num < numCh; num++) {
                        HiveParserASTNode child = (HiveParserASTNode) ast.getChild(num);
                        if (child != null) {
                            if (storageFormat.fillStorageFormat(child)) {
                                directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
                                directoryDesc.setSerName(storageFormat.getSerde());
                                directoryDescIsSet = true;
                                continue;
                            }
                            switch(child.getToken().getType()) {
                                case HiveASTParser.TOK_TABLEROWFORMAT:
                                    rowFormatParams.analyzeRowFormat(child);
                                    directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
                                    directoryDesc.setLineDelim(rowFormatParams.lineDelim);
                                    directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
                                    directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
                                    directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
                                    directoryDesc.setNullFormat(rowFormatParams.nullFormat);
                                    directoryDescIsSet = true;
                                    break;
                                case HiveASTParser.TOK_TABLESERIALIZER:
                                    HiveParserASTNode serdeChild = (HiveParserASTNode) child.getChild(0);
                                    storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
                                    directoryDesc.setSerName(storageFormat.getSerde());
                                    if (serdeChild.getChildCount() > 1) {
                                        directoryDesc.setSerdeProps(new HashMap<String, String>());
                                        readProps((HiveParserASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
                                    }
                                    directoryDescIsSet = true;
                                    break;
                            }
                        }
                    }
                    if (directoryDescIsSet) {
                        qb.setDirectoryDesc(directoryDesc);
                    }
                    break;
                }
            default:
                throw new SemanticException(HiveParserUtils.generateErrorMessage(ast, "Unknown Token Type " + ast.getToken().getType()));
        }
    }
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HiveOutputFormat(org.apache.hadoop.hive.ql.io.HiveOutputFormat) HiveParserBaseSemanticAnalyzer.unescapeSQLString(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.unescapeSQLString) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Path(org.apache.hadoop.fs.Path) TableSpec(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ObjectPair(org.apache.hadoop.hive.common.ObjectPair)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 TableSpec (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableSpec)1 HiveParserBaseSemanticAnalyzer.unescapeSQLString (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.unescapeSQLString)1 Path (org.apache.hadoop.fs.Path)1 ObjectPair (org.apache.hadoop.hive.common.ObjectPair)1 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)1 HiveOutputFormat (org.apache.hadoop.hive.ql.io.HiveOutputFormat)1 RCFileInputFormat (org.apache.hadoop.hive.ql.io.RCFileInputFormat)1 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)1 CreateTableDesc (org.apache.hadoop.hive.ql.plan.CreateTableDesc)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1