Search in sources :

Example 1 with SemanticAnalyzer

use of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer in project hive by apache.

the class QTestUtil method init.

public void init() throws Exception {
    // Create remote dirs once.
    if (mr != null) {
        createRemoteDirs();
    }
    testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
    String execEngine = conf.get("hive.execution.engine");
    conf.set("hive.execution.engine", "mr");
    SessionState.start(conf);
    conf.set("hive.execution.engine", execEngine);
    db = Hive.get(conf);
    drv = new Driver(conf);
    drv.init();
    pd = new ParseDriver();
    sem = new SemanticAnalyzer(queryState);
}
Also used : ParseDriver(org.apache.hadoop.hive.ql.parse.ParseDriver) ParseDriver(org.apache.hadoop.hive.ql.parse.ParseDriver) CliDriver(org.apache.hadoop.hive.cli.CliDriver) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer)

Example 2 with SemanticAnalyzer

use of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer in project hive by apache.

the class RewriteParseContextGenerator method generateOperatorTree.

/**
   * Parse the input {@link String} command and generate an operator tree.
   * @param conf
   * @param command
   * @throws SemanticException
   */
public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, String command) throws SemanticException {
    Operator<? extends OperatorDesc> operatorTree;
    try {
        Context ctx = new Context(queryState.getConf());
        ASTNode tree = ParseUtils.parse(command, ctx);
        BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
        assert (sem instanceof SemanticAnalyzer);
        operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
        LOG.info("Sub-query Semantic Analysis Completed");
    } catch (IOException e) {
        LOG.error("IOException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (ParseException e) {
        LOG.error("ParseException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (SemanticException e) {
        LOG.error("SemanticException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    return operatorTree;
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with SemanticAnalyzer

use of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer in project hive by apache.

the class Driver method doAuthorization.

/**
   * Do authorization using post semantic analysis information in the semantic analyzer
   * The original command is also passed so that authorization interface can provide
   * more useful information in logs.
   * @param sem SemanticAnalyzer used to parse input query
   * @param command input query
   * @throws HiveException
   * @throws AuthorizationException
   */
public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) throws HiveException, AuthorizationException {
    SessionState ss = SessionState.get();
    Hive db = sem.getDb();
    Set<ReadEntity> additionalInputs = new HashSet<ReadEntity>();
    for (Entity e : sem.getInputs()) {
        if (e.getType() == Entity.Type.PARTITION) {
            additionalInputs.add(new ReadEntity(e.getTable()));
        }
    }
    Set<WriteEntity> additionalOutputs = new HashSet<WriteEntity>();
    for (WriteEntity e : sem.getOutputs()) {
        if (e.getType() == Entity.Type.PARTITION) {
            additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType()));
        }
    }
    // The following union operation returns a union, which traverses over the
    // first set once and then  then over each element of second set, in order, 
    // that is not contained in first. This means it doesn't replace anything
    // in first set, and would preserve the WriteType in WriteEntity in first
    // set in case of outputs list.
    Set<ReadEntity> inputs = Sets.union(sem.getInputs(), additionalInputs);
    Set<WriteEntity> outputs = Sets.union(sem.getOutputs(), additionalOutputs);
    if (ss.isAuthorizationModeV2()) {
        // get mapping of tables to columns used
        ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo();
        // colAccessInfo is set only in case of SemanticAnalyzer
        Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo.getTableToColumnAccessMap() : null;
        Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null;
        doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols);
        return;
    }
    if (op == null) {
        throw new HiveException("Operation should not be null");
    }
    HiveAuthorizationProvider authorizer = ss.getAuthorizer();
    if (op.equals(HiveOperation.CREATEDATABASE)) {
        authorizer.authorize(op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges());
    } else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
        authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
    } else {
        if (op.equals(HiveOperation.IMPORT)) {
            ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
            if (!isa.existsTable()) {
                authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
            }
        }
    }
    if (outputs != null && outputs.size() > 0) {
        for (WriteEntity write : outputs) {
            if (write.isDummy() || write.isPathType()) {
                continue;
            }
            if (write.getType() == Entity.Type.DATABASE) {
                if (!op.equals(HiveOperation.IMPORT)) {
                    // We skip DB check for import here because we already handle it above
                    // as a CTAS check.
                    authorizer.authorize(write.getDatabase(), null, op.getOutputRequiredPrivileges());
                }
                continue;
            }
            if (write.getType() == WriteEntity.Type.PARTITION) {
                Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
                if (part != null) {
                    authorizer.authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
                    continue;
                }
            }
            if (write.getTable() != null) {
                authorizer.authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
            }
        }
    }
    if (inputs != null && inputs.size() > 0) {
        Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
        Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
        //determine if partition level privileges should be checked for input tables
        Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
        for (ReadEntity read : inputs) {
            if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) {
                continue;
            }
            Table tbl = read.getTable();
            if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) {
                String tblName = tbl.getTableName();
                if (tableUsePartLevelAuth.get(tblName) == null) {
                    boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
                    if (usePartLevelPriv) {
                        tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
                    } else {
                        tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
                    }
                }
            }
        }
        // column authorization is checked through table scan operators.
        getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth);
        // cache the results for table authorization
        Set<String> tableAuthChecked = new HashSet<String>();
        for (ReadEntity read : inputs) {
            // if read is not direct, we do not need to check its autho.
            if (read.isDummy() || read.isPathType() || !read.isDirect()) {
                continue;
            }
            if (read.getType() == Entity.Type.DATABASE) {
                authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null);
                continue;
            }
            Table tbl = read.getTable();
            if (tbl.isView() && sem instanceof SemanticAnalyzer) {
                tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName()));
            }
            if (read.getPartition() != null) {
                Partition partition = read.getPartition();
                tbl = partition.getTable();
                // use partition level authorization
                if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
                    List<String> cols = part2Cols.get(partition);
                    if (cols != null && cols.size() > 0) {
                        authorizer.authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null);
                    } else {
                        authorizer.authorize(partition, op.getInputRequiredPrivileges(), null);
                    }
                    continue;
                }
            }
            // partitions
            if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) {
                List<String> cols = tab2Cols.get(tbl);
                if (cols != null && cols.size() > 0) {
                    authorizer.authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
                } else {
                    authorizer.authorize(tbl, op.getInputRequiredPrivileges(), null);
                }
                tableAuthChecked.add(tbl.getTableName());
            }
        }
    }
}
Also used : HiveAuthorizationProvider(org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider) SessionState(org.apache.hadoop.hive.ql.session.SessionState) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Entity(org.apache.hadoop.hive.ql.hooks.Entity) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) LinkedList(java.util.LinkedList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Hive(org.apache.hadoop.hive.ql.metadata.Hive)

Example 4 with SemanticAnalyzer

use of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer in project hive by apache.

the class Driver method getTablePartitionUsedColumns.

private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
    // table to columns mapping (tab2Cols)
    if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        for (Map.Entry<String, TableScanOperator> topOpMap : querySem.getParseContext().getTopOps().entrySet()) {
            TableScanOperator tableScanOp = topOpMap.getValue();
            if (!tableScanOp.isInsideView()) {
                Table tbl = tableScanOp.getConf().getTableMetadata();
                List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
                List<FieldSchema> columns = tbl.getCols();
                List<String> cols = new ArrayList<String>();
                for (int i = 0; i < neededColumnIds.size(); i++) {
                    cols.add(columns.get(neededColumnIds.get(i)).getName());
                }
                // table permission
                if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
                    String alias_id = topOpMap.getKey();
                    PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id);
                    Set<Partition> parts = partsList.getPartitions();
                    for (Partition part : parts) {
                        List<String> existingCols = part2Cols.get(part);
                        if (existingCols == null) {
                            existingCols = new ArrayList<String>();
                        }
                        existingCols.addAll(cols);
                        part2Cols.put(part, existingCols);
                    }
                } else {
                    List<String> existingCols = tab2Cols.get(tbl);
                    if (existingCols == null) {
                        existingCols = new ArrayList<String>();
                    }
                    existingCols.addAll(cols);
                    tab2Cols.put(tbl, existingCols);
                }
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 5 with SemanticAnalyzer

use of org.apache.hadoop.hive.ql.parse.SemanticAnalyzer in project phoenix by apache.

the class HiveTestUtil method init.

public void init() throws Exception {
    testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
    conf.setBoolVar(HiveConf.ConfVars.SUBMITLOCALTASKVIACHILD, false);
    String execEngine = conf.get("hive.execution.engine");
    conf.set("hive.execution.engine", "mr");
    SessionState.start(conf);
    conf.set("hive.execution.engine", execEngine);
    db = Hive.get(conf);
    pd = new ParseDriver();
    sem = new SemanticAnalyzer(conf);
}
Also used : ParseDriver(org.apache.hadoop.hive.ql.parse.ParseDriver) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer)

Aggregations

BaseSemanticAnalyzer (org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer)7 SemanticAnalyzer (org.apache.hadoop.hive.ql.parse.SemanticAnalyzer)7 ParseDriver (org.apache.hadoop.hive.ql.parse.ParseDriver)4 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2 ImportSemanticAnalyzer (org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer)2 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)2 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 CliDriver (org.apache.hadoop.hive.cli.CliDriver)1 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)1