Search in sources :

Example 96 with Node

use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.

the class LineageInfo method getLineageInfo.

/**
   * parses given query and gets the lineage info.
   *
   * @param query
   * @throws ParseException
   */
public void getLineageInfo(String query) throws ParseException, SemanticException {
    /*
     * Get the AST tree
     */
    ASTNode tree = ParseUtils.parse(query, null);
    while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
        tree = (ASTNode) tree.getChild(0);
    }
    /*
     * initialize Event Processor and dispatcher.
     */
    inputTableList.clear();
    OutputTableList.clear();
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(tree);
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) Node(org.apache.hadoop.hive.ql.lib.Node) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ArrayList(java.util.ArrayList) Rule(org.apache.hadoop.hive.ql.lib.Rule) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) LinkedHashMap(java.util.LinkedHashMap)

Example 97 with Node

use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.

the class SemanticAnalyzer method parseStreamTables.

private void parseStreamTables(QBJoinTree joinTree, QB qb) {
    List<String> streamAliases = joinTree.getStreamAliases();
    for (Node hintNode : qb.getParseInfo().getHints().getChildren()) {
        ASTNode hint = (ASTNode) hintNode;
        if (hint.getChild(0).getType() == HintParser.TOK_STREAMTABLE) {
            for (int i = 0; i < hint.getChild(1).getChildCount(); i++) {
                if (streamAliases == null) {
                    streamAliases = new ArrayList<String>();
                }
                streamAliases.add(hint.getChild(1).getChild(i).getText());
            }
        }
    }
    joinTree.setStreamAliases(streamAliases);
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) RelNode(org.apache.calcite.rel.RelNode)

Example 98 with Node

use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.

the class SemanticAnalyzer method genValuesTempTable.

/**
   * Generate a temp table out of a values clause
   * See also {@link #preProcessForInsert(ASTNode, QB)}
   */
private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
    Path dataDir = null;
    if (!qb.getEncryptedTargetTablePaths().isEmpty()) {
        //currently only Insert into T values(...) is supported thus only 1 values clause
        //and only 1 target table are possible.  If/when support for
        //select ... from values(...) is added an insert statement may have multiple
        //encrypted target tables.
        dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
    }
    // Pick a name for the table
    SessionState ss = SessionState.get();
    String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
    // Step 1, parse the values clause we were handed
    List<? extends Node> fromChildren = originalFrom.getChildren();
    // First child should be the virtual table ref
    ASTNode virtualTableRef = (ASTNode) fromChildren.get(0);
    assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF : "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " + virtualTableRef.getName();
    List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
    // First child of this should be the table name.  If it's anonymous,
    // then we don't have a table name.
    ASTNode tabName = (ASTNode) virtualTableRefChildren.get(0);
    if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
        // you need to parse this list of columns names and build it into the table
        throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
    }
    // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
    ASTNode valuesTable = (ASTNode) fromChildren.get(1);
    assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE : "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " + valuesTable.getName();
    // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
    List<? extends Node> valuesTableChildren = valuesTable.getChildren();
    // Now that we're going to start reading through the rows, open a file to write the rows too
    // If we leave this method before creating the temporary table we need to be sure to clean up
    // this file.
    Path tablePath = null;
    FileSystem fs = null;
    FSDataOutputStream out = null;
    try {
        if (dataDir == null) {
            tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
        } else {
            //if target table of insert is encrypted, make sure temporary table data is stored
            //similarly encrypted
            tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
        }
        fs = tablePath.getFileSystem(conf);
        fs.mkdirs(tablePath);
        Path dataFile = new Path(tablePath, "data_file");
        out = fs.create(dataFile);
        List<FieldSchema> fields = new ArrayList<FieldSchema>();
        boolean firstRow = true;
        for (Node n : valuesTableChildren) {
            ASTNode valuesRow = (ASTNode) n;
            assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW : "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
            // Each of the children of this should be a literal
            List<? extends Node> valuesRowChildren = valuesRow.getChildren();
            boolean isFirst = true;
            int nextColNum = 1;
            for (Node n1 : valuesRowChildren) {
                ASTNode value = (ASTNode) n1;
                if (firstRow) {
                    fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
                }
                if (isFirst)
                    isFirst = false;
                else
                    writeAsText("", out);
                writeAsText(unparseExprForValuesClause(value), out);
            }
            writeAsText("\n", out);
            firstRow = false;
        }
        // Step 2, create a temp table, using the created file as the data
        StorageFormat format = new StorageFormat(conf);
        format.processStorageFormat("TextFile");
        Table table = db.newTable(tableName);
        table.setSerializationLib(format.getSerde());
        table.setFields(fields);
        table.setDataLocation(tablePath);
        table.getTTable().setTemporary(true);
        table.setStoredAsSubDirectories(false);
        table.setInputFormatClass(format.getInputFormat());
        table.setOutputFormatClass(format.getOutputFormat());
        db.createTable(table, false);
    } catch (Exception e) {
        String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
        LOG.error(errMsg);
        // Try to delete the file
        if (fs != null && tablePath != null) {
            try {
                fs.delete(tablePath, false);
            } catch (IOException swallowIt) {
            }
        }
        throw new SemanticException(errMsg, e);
    } finally {
        IOUtils.closeStream(out);
    }
    // Step 3, return a new subtree with a from clause built around that temp table
    // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
    Token t = new ClassicToken(HiveParser.TOK_TABREF);
    ASTNode tabRef = new ASTNode(t);
    t = new ClassicToken(HiveParser.TOK_TABNAME);
    ASTNode tabNameNode = new ASTNode(t);
    tabRef.addChild(tabNameNode);
    t = new ClassicToken(HiveParser.Identifier, tableName);
    ASTNode identifier = new ASTNode(t);
    tabNameNode.addChild(identifier);
    return tabRef;
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Table(org.apache.hadoop.hive.ql.metadata.Table) ClassicToken(org.antlr.runtime.ClassicToken) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Node(org.apache.hadoop.hive.ql.lib.Node) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) Token(org.antlr.runtime.Token) ClassicToken(org.antlr.runtime.ClassicToken) CommonToken(org.antlr.runtime.CommonToken) IOException(java.io.IOException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 99 with Node

use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.

the class SemanticAnalyzer method genUniqueJoinTree.

private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, Map<String, Operator> aliasToOpInfo) throws SemanticException {
    QBJoinTree joinTree = new QBJoinTree();
    joinTree.setNoOuterJoin(false);
    joinTree.setExpressions(new ArrayList<ArrayList<ASTNode>>());
    joinTree.setFilters(new ArrayList<ArrayList<ASTNode>>());
    joinTree.setFiltersForPushing(new ArrayList<ArrayList<ASTNode>>());
    // Create joinTree structures to fill them up later
    ArrayList<String> rightAliases = new ArrayList<String>();
    ArrayList<String> leftAliases = new ArrayList<String>();
    ArrayList<String> baseSrc = new ArrayList<String>();
    ArrayList<Boolean> preserved = new ArrayList<Boolean>();
    boolean lastPreserved = false;
    int cols = -1;
    for (int i = 0; i < joinParseTree.getChildCount(); i++) {
        ASTNode child = (ASTNode) joinParseTree.getChild(i);
        switch(child.getToken().getType()) {
            case HiveParser.TOK_TABREF:
                // Handle a table - populate aliases appropriately:
                // leftAliases should contain the first table, rightAliases should
                // contain all other tables and baseSrc should contain all tables
                String tableName = getUnescapedUnqualifiedTableName((ASTNode) child.getChild(0));
                String alias = child.getChildCount() == 1 ? tableName : unescapeIdentifier(child.getChild(child.getChildCount() - 1).getText().toLowerCase());
                if (i == 0) {
                    leftAliases.add(alias);
                    joinTree.setLeftAlias(alias);
                } else {
                    rightAliases.add(alias);
                }
                joinTree.getAliasToOpInfo().put(getModifiedAlias(qb, alias), aliasToOpInfo.get(alias));
                joinTree.setId(qb.getId());
                baseSrc.add(alias);
                preserved.add(lastPreserved);
                lastPreserved = false;
                break;
            case HiveParser.TOK_EXPLIST:
                if (cols == -1 && child.getChildCount() != 0) {
                    cols = child.getChildCount();
                } else if (child.getChildCount() != cols) {
                    throw new SemanticException("Tables with different or invalid " + "number of keys in UNIQUEJOIN");
                }
                ArrayList<ASTNode> expressions = new ArrayList<ASTNode>();
                ArrayList<ASTNode> filt = new ArrayList<ASTNode>();
                ArrayList<ASTNode> filters = new ArrayList<ASTNode>();
                for (Node exp : child.getChildren()) {
                    expressions.add((ASTNode) exp);
                }
                joinTree.getExpressions().add(expressions);
                joinTree.getFilters().add(filt);
                joinTree.getFiltersForPushing().add(filters);
                break;
            case HiveParser.KW_PRESERVE:
                lastPreserved = true;
                break;
            case HiveParser.TOK_SUBQUERY:
                throw new SemanticException("Subqueries are not supported in UNIQUEJOIN");
            default:
                throw new SemanticException("Unexpected UNIQUEJOIN structure");
        }
    }
    joinTree.setBaseSrc(baseSrc.toArray(new String[0]));
    joinTree.setLeftAliases(leftAliases.toArray(new String[0]));
    joinTree.setRightAliases(rightAliases.toArray(new String[0]));
    JoinCond[] condn = new JoinCond[preserved.size()];
    for (int i = 0; i < condn.length; i++) {
        condn[i] = new JoinCond(preserved.get(i));
    }
    joinTree.setJoinCond(condn);
    if ((qb.getParseInfo().getHints() != null) && !(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) {
        LOG.info("STREAMTABLE hint honored.");
        parseStreamTables(joinTree, qb);
    }
    return joinTree;
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 100 with Node

use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.

the class SemanticAnalyzer method preProcessForInsert.

/**
   * This will walk AST of an INSERT statement and assemble a list of target tables
   * which are in an HDFS encryption zone.  This is needed to make sure that so that
   * the data from values clause of Insert ... select values(...) is stored securely.
   * See also {@link #genValuesTempTable(ASTNode, QB)}
   * @throws SemanticException
   */
private void preProcessForInsert(ASTNode node, QB qb) throws SemanticException {
    try {
        if (!(node != null && node.getToken() != null && node.getToken().getType() == HiveParser.TOK_QUERY)) {
            return;
        }
        for (Node child : node.getChildren()) {
            //(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
            if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
                continue;
            }
            ASTNode n = (ASTNode) ((ASTNode) child).getFirstChildWithType(HiveParser.TOK_INSERT_INTO);
            if (n == null)
                continue;
            n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TAB);
            if (n == null)
                continue;
            n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TABNAME);
            if (n == null)
                continue;
            String[] dbTab = getQualifiedTableName(n);
            Table t = db.getTable(dbTab[0], dbTab[1]);
            Path tablePath = t.getPath();
            if (isPathEncrypted(tablePath)) {
                qb.addEncryptedTargetTablePath(tablePath);
            }
        }
    } catch (Exception ex) {
        throw new SemanticException(ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) Node(org.apache.hadoop.hive.ql.lib.Node) RelNode(org.apache.calcite.rel.RelNode) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

Node (org.apache.hadoop.hive.ql.lib.Node)103 ArrayList (java.util.ArrayList)87 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)78 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)71 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)70 LinkedHashMap (java.util.LinkedHashMap)60 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)59 Rule (org.apache.hadoop.hive.ql.lib.Rule)58 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)56 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)50 HashMap (java.util.HashMap)18 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)13 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)11 TaskGraphWalker (org.apache.hadoop.hive.ql.lib.TaskGraphWalker)10 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)10 List (java.util.List)8 RelNode (org.apache.calcite.rel.RelNode)8 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)7