Search in sources :

Example 26 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class CheckTableAccessHook method run.

public void run(HookContext hookContext) {
    HiveConf conf = hookContext.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == false) {
        return;
    }
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    TableAccessInfo tableAccessInfo = hookContext.getQueryPlan().getTableAccessInfo();
    if (tableAccessInfo == null || tableAccessInfo.getOperatorToTableAccessMap() == null || tableAccessInfo.getOperatorToTableAccessMap().isEmpty()) {
        return;
    }
    LogHelper console = SessionState.getConsole();
    Map<Operator<? extends OperatorDesc>, Map<String, List<String>>> operatorToTableAccessMap = tableAccessInfo.getOperatorToTableAccessMap();
    // Must be deterministic order map for consistent q-test output across Java versions
    Map<String, String> outputOrderedMap = new LinkedHashMap<String, String>();
    for (Map.Entry<Operator<? extends OperatorDesc>, Map<String, List<String>>> tableAccess : operatorToTableAccessMap.entrySet()) {
        StringBuilder perOperatorInfo = new StringBuilder();
        perOperatorInfo.append("Operator:").append(tableAccess.getKey().getOperatorId()).append("\n");
        for (Map.Entry<String, List<String>> entry : tableAccess.getValue().entrySet()) {
            perOperatorInfo.append("Table:").append(entry.getKey()).append("\n");
            perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ',')).append("\n");
        }
        outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString());
    }
    for (String perOperatorInfo : outputOrderedMap.values()) {
        console.printError(perOperatorInfo);
    }
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) LinkedHashMap(java.util.LinkedHashMap) TableAccessInfo(org.apache.hadoop.hive.ql.parse.TableAccessInfo) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 27 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class LlapInputFormat method findTsOp.

static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {
    if (mapWork.getAliasToWork() == null) {
        throw new HiveException("Unexpected - aliasToWork is missing; " + NONVECTOR_SETTING_MESSAGE);
    }
    Iterator<Operator<?>> ops = mapWork.getAliasToWork().values().iterator();
    TableScanOperator tableScanOperator = null;
    while (ops.hasNext()) {
        Operator<?> op = ops.next();
        if (op instanceof TableScanOperator) {
            if (tableScanOperator != null) {
                throw new HiveException("Unexpected - more than one TSOP; " + NONVECTOR_SETTING_MESSAGE);
            }
            tableScanOperator = (TableScanOperator) op;
        }
    }
    return tableScanOperator;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 28 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class MapredLocalTask method initializeOperators.

private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
        LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
    }
    // this mapper operator is used to initialize all the operators
    for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
        if (entry.getValue() == null) {
            continue;
        }
        JobConf jobClone = new JobConf(job);
        TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
        // push down projections
        ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
        // push down filters and as of information
        HiveInputFormat.pushFiltersAndAsOf(jobClone, ts, null);
        AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
        AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
        // create a fetch operator
        FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
        fetchOpJobConfMap.put(fetchOp, jobClone);
        fetchOperators.put(entry.getKey(), fetchOp);
        l4j.info("fetchoperator for " + entry.getKey() + " created");
    }
    // initialize all forward operator
    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        // get the forward op
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
        // put the exe context into all the operators
        forwardOp.passExecContext(execContext);
        // All the operators need to be initialized before process
        FetchOperator fetchOp = entry.getValue();
        JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
        if (jobConf == null) {
            jobConf = job;
        }
        // initialize the forward operator
        ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
        forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
    }
}
Also used : FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 29 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class VectorMapOperator method internalSetChildren.

/*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    virtualColumnCount = batchContext.getVirtualColumnCount();
    rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
    hasRowIdentifier = (rowIdentifierColumnNum != -1);
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();
    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */
    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, List<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        }
        fileToPartitionContextMap.put(path, vectorPartitionContext);
    }
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 30 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class TaskQueue method finished.

public void finished(TaskRunner runner) {
    if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
        return;
    }
    MapRedTask mapredTask = (MapRedTask) runner.getTask();
    MapWork mapWork = mapredTask.getWork().getMapWork();
    ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
    List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
    if (reduceWork != null) {
        operators.add(reduceWork.getReducer());
    }
    final List<String> statKeys = new ArrayList<String>(1);
    NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {

        @Override
        public void apply(FileSinkOperator fsOp) {
            if (fsOp.getConf().isGatherStats()) {
                statKeys.add(fsOp.getConf().getStatsAggPrefix());
            }
        }
    });
    for (String statKey : statKeys) {
        if (statsTasks.containsKey(statKey)) {
            statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
        } else {
            LOG.debug("There is no correspoing statTask for: " + statKey);
        }
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)215 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)167 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)156 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)134 ArrayList (java.util.ArrayList)123 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)119 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)118 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)107 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)103 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)97 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)85 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)85 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)79 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)71 HashMap (java.util.HashMap)65 LinkedHashMap (java.util.LinkedHashMap)64 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)60 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)59 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)52