Search in sources :

Example 6 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class CheckTableAccessHook method run.

public void run(HookContext hookContext) {
    HiveConf conf = hookContext.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == false) {
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
    TableAccessInfo tableAccessInfo = hookContext.getQueryPlan().getTableAccessInfo();
    if (tableAccessInfo == null || tableAccessInfo.getOperatorToTableAccessMap() == null || tableAccessInfo.getOperatorToTableAccessMap().isEmpty()) {
    LogHelper console = SessionState.getConsole();
    Map<Operator<? extends OperatorDesc>, Map<String, List<String>>> operatorToTableAccessMap = tableAccessInfo.getOperatorToTableAccessMap();
    // Must be deterministic order map for consistent q-test output across Java versions
    Map<String, String> outputOrderedMap = new LinkedHashMap<String, String>();
    for (Map.Entry<Operator<? extends OperatorDesc>, Map<String, List<String>>> tableAccess : operatorToTableAccessMap.entrySet()) {
        StringBuilder perOperatorInfo = new StringBuilder();
        for (Map.Entry<String, List<String>> entry : tableAccess.getValue().entrySet()) {
            perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ',')).append("\n");
        outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString());
    for (String perOperatorInfo : outputOrderedMap.values()) {
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) LinkedHashMap(java.util.LinkedHashMap) TableAccessInfo(org.apache.hadoop.hive.ql.parse.TableAccessInfo) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 7 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class ExplainTask method outputPlan.

private JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception {
    // Check if work has an explain annotation
    Annotation note = AnnotationUtils.getAnnotation(work.getClass(), Explain.class);
    String keyJSONObject = null;
    if (note instanceof Explain) {
        Explain xpl_note = (Explain) note;
        boolean invokeFlag = false;
        if ( != null && {
            invokeFlag =;
        } else {
            if (extended) {
                invokeFlag =;
            } else {
                invokeFlag =;
        if (invokeFlag) {
            Vectorization vectorization = xpl_note.vectorization();
            if ( != null && {
                // The EXPLAIN VECTORIZATION option was specified.
                final boolean desireOnly =;
                final VectorizationDetailLevel desiredVecDetailLevel =;
                switch(vectorization) {
                    case NON_VECTORIZED:
                        // Display all non-vectorized leaf objects unless ONLY.
                        if (desireOnly) {
                            invokeFlag = false;
                    case SUMMARY:
                    case OPERATOR:
                    case EXPRESSION:
                    case DETAIL:
                        if (vectorization.rank < desiredVecDetailLevel.rank) {
                            // This detail not desired.
                            invokeFlag = false;
                    case SUMMARY_PATH:
                    case OPERATOR_PATH:
                        if (desireOnly) {
                            if (vectorization.rank < desiredVecDetailLevel.rank) {
                                // Suppress headers and all objects below.
                                invokeFlag = false;
                        throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
            } else {
                // Do not display vectorization objects.
                switch(vectorization) {
                    case SUMMARY:
                    case OPERATOR:
                    case EXPRESSION:
                    case DETAIL:
                        invokeFlag = false;
                    case NON_VECTORIZED:
                        // No action.
                    case SUMMARY_PATH:
                    case OPERATOR_PATH:
                        // Always include headers since they contain non-vectorized objects, too.
                        throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
        if (invokeFlag) {
            keyJSONObject = xpl_note.displayName();
            if (out != null) {
                if (appendToHeader != null && !appendToHeader.isEmpty()) {
                    out.println(xpl_note.displayName() + appendToHeader);
                } else {
    JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null;
    // conf and then the children
    if (work instanceof Operator) {
        Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) work;
        if (operator.getConf() != null) {
            String appender = isLogical ? " (" + operator.getOperatorId() + ")" : "";
            JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, jsonOutput, jsonOutput ? 0 : indent, appender);
            if ( != null && ( || {
                if (jsonOut != null && jsonOut.length() > 0) {
                    ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("OperatorId:", operator.getOperatorId());
                    if (! && && operator instanceof ReduceSinkOperator) {
                        List<String> outputOperators = ((ReduceSinkOperator) operator).getConf().getOutputOperators();
                        if (outputOperators != null) {
                            ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put(OUTPUT_OPERATORS, Arrays.toString(outputOperators.toArray()));
            if (jsonOutput) {
                json = jsonOut;
        if (!visitedOps.contains(operator) || !isLogical) {
            if (operator.getChildOperators() != null) {
                int cindent = jsonOutput ? 0 : indent + 2;
                for (Operator<? extends OperatorDesc> op : operator.getChildOperators()) {
                    JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, cindent);
                    if (jsonOutput) {
                        ((JSONObject) json.get(JSONObject.getNames(json)[0])).accumulate("children", jsonOut);
        if (jsonOutput) {
            return json;
        return null;
    // We look at all methods that generate values for explain
    Method[] methods = work.getClass().getMethods();
    Arrays.sort(methods, new MethodComparator());
    for (Method m : methods) {
        int prop_indents = jsonOutput ? 0 : indent + 2;
        note = AnnotationUtils.getAnnotation(m, Explain.class);
        if (note instanceof Explain) {
            Explain xpl_note = (Explain) note;
            boolean invokeFlag = false;
            if ( != null && {
                invokeFlag =;
            } else {
                if (extended) {
                    invokeFlag =;
                } else {
                    invokeFlag =;
            if (invokeFlag) {
                Vectorization vectorization = xpl_note.vectorization();
                if ( != null && {
                    // The EXPLAIN VECTORIZATION option was specified.
                    final boolean desireOnly =;
                    final VectorizationDetailLevel desiredVecDetailLevel =;
                    switch(vectorization) {
                        case NON_VECTORIZED:
                            // Display all non-vectorized leaf objects unless ONLY.
                            if (desireOnly) {
                                invokeFlag = false;
                        case SUMMARY:
                        case OPERATOR:
                        case EXPRESSION:
                        case DETAIL:
                            if (vectorization.rank < desiredVecDetailLevel.rank) {
                                // This detail not desired.
                                invokeFlag = false;
                        case SUMMARY_PATH:
                        case OPERATOR_PATH:
                            if (desireOnly) {
                                if (vectorization.rank < desiredVecDetailLevel.rank) {
                                    // Suppress headers and all objects below.
                                    invokeFlag = false;
                            throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
                } else {
                    // Do not display vectorization objects.
                    switch(vectorization) {
                        case SUMMARY:
                        case OPERATOR:
                        case EXPRESSION:
                        case DETAIL:
                            invokeFlag = false;
                        case NON_VECTORIZED:
                            // No action.
                        case SUMMARY_PATH:
                        case OPERATOR_PATH:
                            // Always include headers since they contain non-vectorized objects, too.
                            throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
            if (invokeFlag) {
                Object val = null;
                try {
                    val = m.invoke(work);
                } catch (InvocationTargetException ex) {
                    // Ignore the exception, this may be caused by external jars
                    val = null;
                if (val == null) {
                String header = null;
                boolean skipHeader = xpl_note.skipHeader();
                boolean emptyHeader = false;
                if (!xpl_note.displayName().equals("")) {
                    header = indentString(prop_indents) + xpl_note.displayName() + ":";
                } else {
                    emptyHeader = true;
                    prop_indents = indent;
                    header = indentString(prop_indents);
                // Try the output as a primitive object
                if (isPrintable(val)) {
                    if (out != null && shouldPrint(xpl_note, val)) {
                        if (!skipHeader) {
                            out.print(" ");
                    if (jsonOutput && shouldPrint(xpl_note, val)) {
                        json.put(header, val.toString());
                int ind = 0;
                if (!jsonOutput) {
                    if (!skipHeader) {
                        ind = prop_indents + 2;
                    } else {
                        ind = indent;
                // Try this as a map
                if (val instanceof Map) {
                    // Go through the map and print out the stuff
                    Map<?, ?> mp = (Map<?, ?>) val;
                    if (out != null && !skipHeader && mp != null && !mp.isEmpty()) {
                    JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind);
                    if (jsonOutput && !mp.isEmpty()) {
                        json.put(header, jsonOut);
                // Try this as a list
                if (val instanceof List || val instanceof Set) {
                    List l = val instanceof List ? (List) val : new ArrayList((Set) val);
                    if (out != null && !skipHeader && l != null && !l.isEmpty()) {
                    JSONArray jsonOut = outputList(l, out, !skipHeader && !emptyHeader, extended, jsonOutput, ind);
                    if (jsonOutput && !l.isEmpty()) {
                        json.put(header, jsonOut);
                // Finally check if it is serializable
                try {
                    if (!skipHeader && out != null) {
                    JSONObject jsonOut = outputPlan(val, out, extended, jsonOutput, ind);
                    if (jsonOutput && jsonOut != null && jsonOut.length() != 0) {
                        if (!skipHeader) {
                            json.put(header, jsonOut);
                        } else {
                            for (String k : JSONObject.getNames(jsonOut)) {
                                json.put(k, jsonOut.get(k));
                } catch (ClassCastException ce) {
                // Ignore
    if (jsonOutput) {
        if (keyJSONObject != null) {
            JSONObject ret = new JSONObject(new LinkedHashMap<>());
            ret.put(keyJSONObject, json);
            return ret;
        return json;
    return null;
Also used : VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) Set(java.util.Set) HashSet(java.util.HashSet) StringSet(org.apache.hadoop.hive.conf.Validator.StringSet) ArrayList(java.util.ArrayList) Vectorization(org.apache.hadoop.hive.ql.plan.Explain.Vectorization) LinkedHashMap(java.util.LinkedHashMap) List(java.util.List) ArrayList(java.util.ArrayList) Explain(org.apache.hadoop.hive.ql.plan.Explain) JSONArray(org.json.JSONArray) Method(java.lang.reflect.Method) Annotation(java.lang.annotation.Annotation) InvocationTargetException(java.lang.reflect.InvocationTargetException) VectorizationDetailLevel(org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel) JSONObject(org.json.JSONObject) JSONObject(org.json.JSONObject) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap)

Example 8 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class MapredLocalTask method initializeOperators.

private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
        LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
    // this mapper operator is used to initialize all the operators
    for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
        if (entry.getValue() == null) {
        JobConf jobClone = new JobConf(job);
        TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
        // push down projections
        ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
        // push down filters
        HiveInputFormat.pushFilters(jobClone, ts);
        AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable());
        AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
        // create a fetch operator
        FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
        fetchOpJobConfMap.put(fetchOp, jobClone);
        fetchOperators.put(entry.getKey(), fetchOp);"fetchoperator for " + entry.getKey() + " created");
    // initialize all forward operator
    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        // get the forward op
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
        // put the exe context into all the operators
        // All the operators need to be initialized before process
        FetchOperator fetchOp = entry.getValue();
        JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
        if (jobConf == null) {
            jobConf = job;
        // initialize the forward operator
        ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
        forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });"fetchoperator for " + entry.getKey() + " initialized");
Also used : FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 9 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class SparkMapRecordHandler method init.

public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    isLogInfoEnabled = LOG.isInfoEnabled();
    try {
        jc = job;
        execContext = new ExecMapperContext(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        // initialize map operator
        mo.initialize(jc, null);
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        MapredContext.init(true, new JobConf(jc));
        OperatorUtils.setChildrenCollector(mo.getChildOperators(), output);
        if (localWork == null) {
        //The following code is for mapjoin
        //initialize all the dummy ops"Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.initialize(jc, null);
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed: " + e, e);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) ExecMapperContext( VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 10 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class VectorMapOperator method internalSetChildren.

   * Create information for vector map operator.
   * The member oneRootOperator has been set.
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
     * This batch is used by vector/row deserializer readers.
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
     * Create table related objects
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
    fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)


OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)78 Operator (org.apache.hadoop.hive.ql.exec.Operator)65 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)41 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)36 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)32 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)30 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)29 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)22 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)21 Path (org.apache.hadoop.fs.Path)20 LinkedHashMap (java.util.LinkedHashMap)18 HashMap (java.util.HashMap)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 Serializable ( Task (org.apache.hadoop.hive.ql.exec.Task)15 List (java.util.List)14 Map (java.util.Map)13 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)13