Search in sources :

Example 1 with ColumnAccessInfo

use of org.apache.hadoop.hive.ql.parse.ColumnAccessInfo in project hive by apache.

the class Driver method doAuthorization.

/**
 * Do authorization using post semantic analysis information in the semantic analyzer
 * The original command is also passed so that authorization interface can provide
 * more useful information in logs.
 * @param sem SemanticAnalyzer used to parse input query
 * @param command input query
 * @throws HiveException
 * @throws AuthorizationException
 */
public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) throws HiveException, AuthorizationException {
    SessionState ss = SessionState.get();
    Hive db = sem.getDb();
    Set<ReadEntity> additionalInputs = new HashSet<ReadEntity>();
    for (Entity e : sem.getInputs()) {
        if (e.getType() == Entity.Type.PARTITION) {
            additionalInputs.add(new ReadEntity(e.getTable()));
        }
    }
    Set<WriteEntity> additionalOutputs = new HashSet<WriteEntity>();
    for (WriteEntity e : sem.getOutputs()) {
        if (e.getType() == Entity.Type.PARTITION) {
            additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType()));
        }
    }
    // The following union operation returns a union, which traverses over the
    // first set once and then  then over each element of second set, in order,
    // that is not contained in first. This means it doesn't replace anything
    // in first set, and would preserve the WriteType in WriteEntity in first
    // set in case of outputs list.
    Set<ReadEntity> inputs = Sets.union(sem.getInputs(), additionalInputs);
    Set<WriteEntity> outputs = Sets.union(sem.getOutputs(), additionalOutputs);
    if (ss.isAuthorizationModeV2()) {
        // get mapping of tables to columns used
        ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo();
        // colAccessInfo is set only in case of SemanticAnalyzer
        Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo.getTableToColumnAccessMap() : null;
        Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null;
        doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols);
        return;
    }
    if (op == null) {
        throw new HiveException("Operation should not be null");
    }
    HiveAuthorizationProvider authorizer = ss.getAuthorizer();
    if (op.equals(HiveOperation.CREATEDATABASE)) {
        authorizer.authorize(op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges());
    } else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
        authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
    } else {
        if (op.equals(HiveOperation.IMPORT)) {
            ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
            if (!isa.existsTable()) {
                authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
            }
        }
    }
    if (outputs != null && outputs.size() > 0) {
        for (WriteEntity write : outputs) {
            if (write.isDummy() || write.isPathType()) {
                continue;
            }
            if (write.getType() == Entity.Type.DATABASE) {
                if (!op.equals(HiveOperation.IMPORT)) {
                    // We skip DB check for import here because we already handle it above
                    // as a CTAS check.
                    authorizer.authorize(write.getDatabase(), null, op.getOutputRequiredPrivileges());
                }
                continue;
            }
            if (write.getType() == WriteEntity.Type.PARTITION) {
                Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
                if (part != null) {
                    authorizer.authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
                    continue;
                }
            }
            if (write.getTable() != null) {
                authorizer.authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
            }
        }
    }
    if (inputs != null && inputs.size() > 0) {
        Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
        Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
        // determine if partition level privileges should be checked for input tables
        Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
        for (ReadEntity read : inputs) {
            if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) {
                continue;
            }
            Table tbl = read.getTable();
            if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) {
                String tblName = tbl.getTableName();
                if (tableUsePartLevelAuth.get(tblName) == null) {
                    boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
                    if (usePartLevelPriv) {
                        tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
                    } else {
                        tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
                    }
                }
            }
        }
        // column authorization is checked through table scan operators.
        getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth);
        // cache the results for table authorization
        Set<String> tableAuthChecked = new HashSet<String>();
        for (ReadEntity read : inputs) {
            // if read is not direct, we do not need to check its autho.
            if (read.isDummy() || read.isPathType() || !read.isDirect()) {
                continue;
            }
            if (read.getType() == Entity.Type.DATABASE) {
                authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null);
                continue;
            }
            Table tbl = read.getTable();
            if (tbl.isView() && sem instanceof SemanticAnalyzer) {
                tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName()));
            }
            if (read.getPartition() != null) {
                Partition partition = read.getPartition();
                tbl = partition.getTable();
                // use partition level authorization
                if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
                    List<String> cols = part2Cols.get(partition);
                    if (cols != null && cols.size() > 0) {
                        authorizer.authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null);
                    } else {
                        authorizer.authorize(partition, op.getInputRequiredPrivileges(), null);
                    }
                    continue;
                }
            }
            // partitions
            if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) {
                List<String> cols = tab2Cols.get(tbl);
                if (cols != null && cols.size() > 0) {
                    authorizer.authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
                } else {
                    authorizer.authorize(tbl, op.getInputRequiredPrivileges(), null);
                }
                tableAuthChecked.add(tbl.getTableName());
            }
        }
    }
}
Also used : HiveAuthorizationProvider(org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider) SessionState(org.apache.hadoop.hive.ql.session.SessionState) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Entity(org.apache.hadoop.hive.ql.hooks.Entity) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) List(java.util.List) LinkedList(java.util.LinkedList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Hive(org.apache.hadoop.hive.ql.metadata.Hive)

Example 2 with ColumnAccessInfo

use of org.apache.hadoop.hive.ql.parse.ColumnAccessInfo in project hive by apache.

the class ShowPartitionAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    ctx.setResFile(ctx.getLocalTmpPath());
    String tableName = getUnescapedName((ASTNode) ast.getChild(0));
    List<Map<String, String>> partSpecs = getPartitionSpecs(getTable(tableName), ast);
    assert (partSpecs.size() <= 1);
    Map<String, String> partSpec = (partSpecs.size() > 0) ? partSpecs.get(0) : null;
    Table table = getTable(HiveTableName.of(tableName));
    inputs.add(new ReadEntity(table));
    setColumnAccessInfo(new ColumnAccessInfo());
    table.getPartColNames().forEach(col -> getColumnAccessInfo().add(table.getCompleteName(), col));
    ExprNodeDesc filter = getShowPartitionsFilter(table, ast);
    String orderBy = getShowPartitionsOrder(table, ast);
    short limit = getShowPartitionsLimit(ast);
    ShowPartitionsDesc desc = new ShowPartitionsDesc(tableName, ctx.getResFile(), partSpec, filter, orderBy, limit);
    Task<DDLWork> task = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc));
    rootTasks.add(task);
    task.setFetchSource(true);
    setFetchTask(createFetchTask(ShowPartitionsDesc.SCHEMA));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashMap(java.util.HashMap) Map(java.util.Map) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo)

Example 3 with ColumnAccessInfo

use of org.apache.hadoop.hive.ql.parse.ColumnAccessInfo in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
    final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
    if (columnAccessInfo != null) {
        // Store information about column accessed by the table so it can be used
        // to send only this information for column masking
        final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable();
        final String qualifiedName = tab.getHiveTableMD().getCompleteName();
        final List<FieldSchema> allCols = tab.getHiveTableMD().getAllCols();
        final boolean insideView = tableAccessRel.isInsideView();
        fieldsUsed.asList().stream().filter(idx -> idx < tab.getNoOfNonVirtualCols()).forEach(idx -> {
            if (insideView) {
                columnAccessInfo.addIndirect(qualifiedName, allCols.get(idx).getName());
            } else {
                columnAccessInfo.add(qualifiedName, allCols.get(idx).getName());
            }
        });
    }
    if (fetchStats) {
        fetchColStats(result.getKey(), tableAccessRel, fieldsUsed, extraFields);
    }
    return result;
}
Also used : ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) Mappings(org.apache.calcite.util.mapping.Mappings) MappingType(org.apache.calcite.util.mapping.MappingType) LoggerFactory(org.slf4j.LoggerFactory) IntPair(org.apache.calcite.util.mapping.IntPair) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) RexUtil(org.apache.calcite.rex.RexUtil) CorrelationId(org.apache.calcite.rel.core.CorrelationId) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) Map(java.util.Map) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) TableFunctionScan(org.apache.calcite.rel.core.TableFunctionScan) SqlKind(org.apache.calcite.sql.SqlKind) RexLiteral(org.apache.calcite.rex.RexLiteral) Set(java.util.Set) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) List(java.util.List) RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) RelCollation(org.apache.calcite.rel.RelCollation) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexCorrelVariable(org.apache.calcite.rex.RexCorrelVariable) RexTableInputRef(org.apache.calcite.rex.RexTableInputRef) RexCall(org.apache.calcite.rex.RexCall) Project(org.apache.calcite.rel.core.Project) TableScan(org.apache.calcite.rel.core.TableScan) Iterables(com.google.common.collect.Iterables) RexFieldAccess(org.apache.calcite.rex.RexFieldAccess) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HashMap(java.util.HashMap) Ord(org.apache.calcite.linq4j.Ord) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) RelOptTable(org.apache.calcite.plan.RelOptTable) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Pair(org.apache.calcite.util.Pair) Mapping(org.apache.calcite.util.mapping.Mapping) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) LinkedHashSet(java.util.LinkedHashSet) RelDataType(org.apache.calcite.rel.type.RelDataType) Logger(org.slf4j.Logger) RexBuilder(org.apache.calcite.rex.RexBuilder) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) Table(org.apache.hadoop.hive.ql.metadata.Table) RelNode(org.apache.calcite.rel.RelNode) Aggregate(org.apache.calcite.rel.core.Aggregate) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HiveCalciteUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil) RexVisitor(org.apache.calcite.rex.RexVisitor) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) RelDistribution(org.apache.calcite.rel.RelDistribution) AggregateCall(org.apache.calcite.rel.core.AggregateCall) CorrelationReferenceFinder(org.apache.calcite.sql2rel.CorrelationReferenceFinder) Collections(java.util.Collections) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo)

Example 4 with ColumnAccessInfo

use of org.apache.hadoop.hive.ql.parse.ColumnAccessInfo in project flink by apache.

the class HiveParserCalcitePlanner method logicalPlan.

private RelNode logicalPlan() {
    if (semanticAnalyzer.columnAccessInfo == null) {
        semanticAnalyzer.columnAccessInfo = new ColumnAccessInfo();
    }
    subqueryId = 0;
    relToRowResolver.clear();
    relToHiveColNameCalcitePosMap.clear();
    try {
        RelNode plan = genLogicalPlan(getQB(), true, null, null);
        if (createViewInfo != null) {
            semanticAnalyzer.resultSchema = HiveParserUtils.convertRowSchemaToResultSetSchema(relToRowResolver.get(plan), false);
            HiveParserUtils.saveViewDefinition(semanticAnalyzer.resultSchema, createViewInfo, semanticAnalyzer.ctx.getTokenRewriteStream(), semanticAnalyzer.unparseTranslator, semanticAnalyzer.getConf());
        } else if (ctasCols != null) {
            // CTAS doesn't allow specifying col list, so we set it according to result schema
            semanticAnalyzer.resultSchema = HiveParserUtils.convertRowSchemaToResultSetSchema(relToRowResolver.get(plan), false);
            ctasCols.addAll(semanticAnalyzer.resultSchema);
        }
        return plan;
    } catch (SemanticException e) {
        throw new RuntimeException(e);
    }
}
Also used : RelNode(org.apache.calcite.rel.RelNode) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 5 with ColumnAccessInfo

use of org.apache.hadoop.hive.ql.parse.ColumnAccessInfo in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

/**
 * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
 * {@link org.apache.calcite.rel.logical.LogicalProject}.
 */
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    // set columnAccessInfo for ViewColumnAuthorization
    final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
    final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get();
    if (columnAccessInfo != null && viewProjectToTableSchema != null && viewProjectToTableSchema.containsKey(project)) {
        for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
            if (fieldsUsed.get(ord.i)) {
                Table tab = viewProjectToTableSchema.get(project);
                columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
            }
        }
    }
    return super.trimFields(project, fieldsUsed, extraFields);
}
Also used : RelOptTable(org.apache.calcite.plan.RelOptTable) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

ColumnAccessInfo (org.apache.hadoop.hive.ql.parse.ColumnAccessInfo)6 Table (org.apache.hadoop.hive.ql.metadata.Table)4 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 RelOptTable (org.apache.calcite.plan.RelOptTable)2 RelNode (org.apache.calcite.rel.RelNode)2 RexNode (org.apache.calcite.rex.RexNode)2 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)2 ImmutableList (com.google.common.collect.ImmutableList)1 Iterables (com.google.common.collect.Iterables)1 Lists (com.google.common.collect.Lists)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Set (java.util.Set)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1