Search in sources :

Example 51 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ColumnAccessAnalyzer method analyzeColumnAccess.

public ColumnAccessInfo analyzeColumnAccess(ColumnAccessInfo columnAccessInfo) throws SemanticException {
    if (columnAccessInfo == null) {
        columnAccessInfo = new ColumnAccessInfo();
    }
    Collection<TableScanOperator> topOps = pGraphContext.getTopOps().values();
    for (TableScanOperator top : topOps) {
        // if a table is inside view, we do not care about its authorization.
        if (!top.isInsideView()) {
            Table table = top.getConf().getTableMetadata();
            String tableName = table.getCompleteName();
            List<String> referenced = top.getReferencedColumns();
            for (String column : referenced) {
                columnAccessInfo.add(tableName, column);
            }
            if (table.isPartitioned()) {
                PrunedPartitionList parts = pGraphContext.getPrunedPartitions(table.getTableName(), top);
                if (parts.getReferredPartCols() != null) {
                    for (String partKey : parts.getReferredPartCols()) {
                        columnAccessInfo.add(tableName, partKey);
                    }
                }
            }
        }
    }
    return columnAccessInfo;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table)

Example 52 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.

/**
   * This method replaces the original TableScanOperator with the new
   * TableScanOperator and metadata that scans over the index table rather than
   * scanning over the original table.
   *
   */
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
    String alias = rewriteQueryCtx.getAlias();
    // Need to remove the original TableScanOperators from these data structures
    // and add new ones
    HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
    // remove original TableScanOperator
    topOps.remove(alias);
    String indexTableName = rewriteQueryCtx.getIndexName();
    Table indexTableHandle = null;
    try {
        indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
    } catch (HiveException e) {
        LOG.error("Error while getting the table handle for index table.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    // construct a new descriptor for the index table scan
    TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
    indexTableScanDesc.setGatherStats(false);
    String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
    indexTableScanDesc.setStatsAggPrefix(k);
    scanOperator.setConf(indexTableScanDesc);
    // Construct the new RowResolver for the new TableScanOperator
    ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
    try {
        StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
        StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
        sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
    } catch (SerDeException e) {
        LOG.error("Error while creating the RowResolver for new TableScanOperator.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    RowSchema rs = new RowSchema(sigRS);
    // Set row resolver for new table
    String newAlias = indexTableName;
    int index = alias.lastIndexOf(":");
    if (index >= 0) {
        newAlias = alias.substring(0, index) + ":" + indexTableName;
    }
    // Scan operator now points to other table
    scanOperator.getConf().setAlias(newAlias);
    scanOperator.setAlias(indexTableName);
    topOps.put(newAlias, scanOperator);
    rewriteQueryCtx.getParseContext().setTopOps(topOps);
    ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldNode(org.apache.hadoop.hive.ql.optimizer.FieldNode) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 53 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ExprProcFactory method findSourceColumn.

private static boolean findSourceColumn(LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
    for (Map.Entry<String, TableScanOperator> topOpMap : lctx.getParseCtx().getTopOps().entrySet()) {
        TableScanOperator tableScanOp = topOpMap.getValue();
        Table tbl = tableScanOp.getConf().getTableMetadata();
        if (tbl.getTableName().equals(tabAlias) || tabAlias.equals(tableScanOp.getConf().getAlias())) {
            for (FieldSchema column : tbl.getCols()) {
                if (column.getName().equals(alias)) {
                    TableAliasInfo table = new TableAliasInfo();
                    table.setTable(tbl.getTTable());
                    table.setAlias(tabAlias);
                    BaseColumnInfo colInfo = new BaseColumnInfo();
                    colInfo.setColumn(column);
                    colInfo.setTabAlias(table);
                    cond.getBaseCols().add(colInfo);
                    return true;
                }
            }
        }
    }
    return false;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) TableAliasInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.TableAliasInfo) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 54 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class CommonJoinTaskDispatcher method mergeMapJoinTaskIntoItsChildMapRedTask.

/*
   * A task and its child task has been converted from join to mapjoin.
   * See if the two tasks can be merged.
   */
private void mergeMapJoinTaskIntoItsChildMapRedTask(MapRedTask mapJoinTask, Configuration conf) throws SemanticException {
    // If so, check if we can merge mapJoinTask into that child.
    if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) {
        // child-tasks in which case we don't want to do anything.
        return;
    }
    Task<? extends Serializable> childTask = mapJoinTask.getChildTasks().get(0);
    if (!(childTask instanceof MapRedTask)) {
        // Nothing to do if it is not a MapReduce task.
        return;
    }
    MapRedTask childMapRedTask = (MapRedTask) childTask;
    MapWork mapJoinMapWork = mapJoinTask.getWork().getMapWork();
    MapWork childMapWork = childMapRedTask.getWork().getMapWork();
    Map<String, Operator<? extends OperatorDesc>> mapJoinAliasToWork = mapJoinMapWork.getAliasToWork();
    if (mapJoinAliasToWork.size() > 1) {
        // Do not merge if the MapredWork of MapJoin has multiple input aliases.
        return;
    }
    Entry<String, Operator<? extends OperatorDesc>> mapJoinAliasToWorkEntry = mapJoinAliasToWork.entrySet().iterator().next();
    String mapJoinAlias = mapJoinAliasToWorkEntry.getKey();
    TableScanOperator mapJoinTaskTableScanOperator = OperatorUtils.findSingleOperator(mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class);
    if (mapJoinTaskTableScanOperator == null) {
        throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + mapJoinAlias + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator.");
    }
    FileSinkOperator mapJoinTaskFileSinkOperator = OperatorUtils.findSingleOperator(mapJoinTaskTableScanOperator, FileSinkOperator.class);
    if (mapJoinTaskFileSinkOperator == null) {
        throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + " operator at the last operator of the MapJoin Task.");
    }
    // The mapJoinTaskFileSinkOperator writes to a different directory
    Path childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName();
    List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
    if (childMRAliases == null || childMRAliases.size() != 1) {
        return;
    }
    String childMRAlias = childMRAliases.get(0);
    // Sanity check to make sure there is no alias conflict after merge.
    for (Entry<Path, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        List<String> aliases = entry.getValue();
        if (path.equals(childMRPath)) {
            continue;
        }
        if (aliases.contains(mapJoinAlias)) {
            // alias confict should not happen here.
            return;
        }
    }
    MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapRedLocalWork();
    MapredLocalWork childLocalWork = childMapWork.getMapRedLocalWork();
    if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
        // We should relax this constraint with a follow-up jira.
        return;
    }
    // is under the limit.
    if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)) {
        // Do not merge.
        return;
    }
    TableScanOperator childMRTaskTableScanOperator = OperatorUtils.findSingleOperator(childMapWork.getAliasToWork().get(childMRAlias.toString()), TableScanOperator.class);
    if (childMRTaskTableScanOperator == null) {
        throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + childMRAlias + ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
    }
    List<Operator<? extends OperatorDesc>> parentsInMapJoinTask = mapJoinTaskFileSinkOperator.getParentOperators();
    List<Operator<? extends OperatorDesc>> childrenInChildMRTask = childMRTaskTableScanOperator.getChildOperators();
    if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
        // Do not merge if we do not know how to connect two operator trees.
        return;
    }
    // Step 2: Merge mapJoinTask into the Map-side of its child.
    // Step 2.1: Connect the operator trees of two MapRedTasks.
    Operator<? extends OperatorDesc> parentInMapJoinTask = parentsInMapJoinTask.get(0);
    Operator<? extends OperatorDesc> childInChildMRTask = childrenInChildMRTask.get(0);
    parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask);
    childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);
    // Step 2.2: Replace the corresponding part childMRWork's MapWork.
    GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias.toString(), mapJoinMapWork, childMapWork);
    // Step 2.3: Fill up stuff in local work
    if (mapJoinLocalWork != null) {
        if (childLocalWork == null) {
            childMapWork.setMapRedLocalWork(mapJoinLocalWork);
        } else {
            childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork());
            childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork());
        }
    }
    // Step 2.4: Remove this MapJoin task
    List<Task<? extends Serializable>> parentTasks = mapJoinTask.getParentTasks();
    mapJoinTask.setParentTasks(null);
    mapJoinTask.setChildTasks(null);
    childMapRedTask.getParentTasks().remove(mapJoinTask);
    if (parentTasks != null) {
        childMapRedTask.getParentTasks().addAll(parentTasks);
        for (Task<? extends Serializable> parentTask : parentTasks) {
            parentTask.getChildTasks().remove(mapJoinTask);
            if (!parentTask.getChildTasks().contains(childMapRedTask)) {
                parentTask.getChildTasks().add(childMapRedTask);
            }
        }
    } else {
        if (physicalContext.getRootTasks().contains(mapJoinTask)) {
            physicalContext.removeFromRootTask(mapJoinTask);
            if (childMapRedTask.getParentTasks() != null && childMapRedTask.getParentTasks().size() == 0 && !physicalContext.getRootTasks().contains(childMapRedTask)) {
                physicalContext.addToRootTask(childMapRedTask);
            }
        }
    }
    if (childMapRedTask.getParentTasks().size() == 0) {
        childMapRedTask.setParentTasks(null);
    }
}
Also used : LateralViewForwardOperator(org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Serializable(java.io.Serializable) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ArrayList(java.util.ArrayList) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 55 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class RewriteGBUsingIndex method shouldApplyOptimization.

/**
   * We traverse the current operator tree to check for conditions in which the
   * optimization cannot be applied.
   *
   * At the end, we check if all conditions have passed for rewrite. If yes, we
   * determine if the the index is usable for rewrite. Else, we log the condition which
   * did not meet the rewrite criterion.
   *
   * @return
   * @throws SemanticException
   */
boolean shouldApplyOptimization() throws SemanticException {
    Map<Table, List<Index>> tableToIndex = getIndexesForRewrite();
    if (tableToIndex.isEmpty()) {
        LOG.debug("No Valid Index Found to apply Rewrite, " + "skipping " + getName() + " optimization");
        return false;
    }
    /*
     * This code iterates over each TableScanOperator from the topOps map from ParseContext.
     * For each operator tree originating from this top TableScanOperator, we determine
     * if the optimization can be applied. If yes, we add the name of the top table to
     * the tsOpToProcess to apply rewrite later on.
     * */
    for (Map.Entry<String, TableScanOperator> entry : parseContext.getTopOps().entrySet()) {
        String alias = entry.getKey();
        TableScanOperator topOp = entry.getValue();
        Table table = topOp.getConf().getTableMetadata();
        List<Index> indexes = tableToIndex.get(table);
        if (indexes.isEmpty()) {
            continue;
        }
        if (table.isPartitioned()) {
            //all partitions. If not, then we do not apply the optimization
            if (!checkIfIndexBuiltOnAllTablePartitions(topOp, indexes)) {
                LOG.debug("Index is not built for all table partitions, " + "skipping " + getName() + " optimization");
                continue;
            }
        }
        //check if rewrite can be applied for operator tree
        //if there are no partitions on base table
        checkIfRewriteCanBeApplied(alias, topOp, table, indexes);
    }
    return !tsOpToProcess.isEmpty();
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) List(java.util.List) Index(org.apache.hadoop.hive.metastore.api.Index) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)88 Operator (org.apache.hadoop.hive.ql.exec.Operator)35 ArrayList (java.util.ArrayList)33 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)28 Table (org.apache.hadoop.hive.ql.metadata.Table)21 HashMap (java.util.HashMap)20 Path (org.apache.hadoop.fs.Path)20 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)20 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)19 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)19 LinkedHashMap (java.util.LinkedHashMap)18 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)18 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)18 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)15 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)15 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 Map (java.util.Map)13 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)12 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)12