Search in sources :

Example 41 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class VectorMapOperator method internalSetChildren.

/*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    virtualColumnCount = batchContext.getVirtualColumnCount();
    rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
    hasRowIdentifier = (rowIdentifierColumnNum != -1);
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();
    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */
    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        }
        fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
    }
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 42 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class SparkMapRecordHandler method init.

@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    try {
        jc = job;
        execContext = new ExecMapperContext(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        }
        mo.setConf(mrwork);
        // initialize map operator
        mo.initialize(jc, null);
        mo.setChildren(job);
        LOG.info(mo.dump(0));
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        execContext.setLocalWork(localWork);
        MapredContext.init(true, new JobConf(jc));
        MapredContext.get().setReporter(reporter);
        mo.passExecContext(execContext);
        mo.initializeLocalWork(jc);
        mo.initializeMapOperator(jc);
        mo.setReporter(rp);
        if (localWork == null) {
            return;
        }
        // The following code is for mapjoin
        // initialize all the dummy ops
        LOG.info("Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.setExecContext(execContext);
            dummyOp.initialize(jc, null);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed: " + e, e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 43 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class MapredLocalTask method initializeOperators.

private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
        LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
    }
    // this mapper operator is used to initialize all the operators
    for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
        if (entry.getValue() == null) {
            continue;
        }
        JobConf jobClone = new JobConf(job);
        TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
        // push down projections
        ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
        // push down filters
        HiveInputFormat.pushFilters(jobClone, ts, null);
        AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
        AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
        // create a fetch operator
        FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
        fetchOpJobConfMap.put(fetchOp, jobClone);
        fetchOperators.put(entry.getKey(), fetchOp);
        l4j.info("fetchoperator for " + entry.getKey() + " created");
    }
    // initialize all forward operator
    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        // get the forward op
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
        // put the exe context into all the operators
        forwardOp.passExecContext(execContext);
        // All the operators need to be initialized before process
        FetchOperator fetchOp = entry.getValue();
        JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
        if (jobConf == null) {
            jobConf = job;
        }
        // initialize the forward operator
        ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
        forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
    }
}
Also used : FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 44 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class MapOperator method setChildren.

public void setChildren(Configuration hconf) throws Exception {
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path onefile = entry.getKey();
        List<String> aliases = entry.getValue();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = partDesc.getTableDesc();
        Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
        for (String alias : aliases) {
            Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
            }
            Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
            if (contexts == null) {
                opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
            }
            if (contexts.containsKey(op)) {
                continue;
            }
            MapOpCtx context = new MapOpCtx(alias, op, partDesc);
            StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
            contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
            if (children.contains(op) == false) {
                op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
                op.getParentOperators().add(this);
                children.add(op);
            }
        }
    }
    initOperatorContext(children);
    // we found all the operators that we are supposed to process.
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 45 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class OperatorFactory method getVectorOperator.

public static <T extends OperatorDesc> Operator<T> getVectorOperator(Class<? extends Operator<?>> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException {
    Constructor<? extends Operator<?>> constructor;
    try {
        constructor = opClass.getDeclaredConstructor(CompilationOpContext.class, OperatorDesc.class, VectorizationContext.class, VectorDesc.class);
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException("Constructor " + opClass.getSimpleName() + "(CompilationOpContext, OperatorDesc, VectorizationContext, VectorDesc) not found", e);
    }
    try {
        vectorDesc.setVectorOp(opClass);
        Operator<T> op = (Operator<T>) constructor.newInstance(cContext, conf, vContext, vectorDesc);
        return op;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException("Error encountered calling constructor " + opClass.getSimpleName() + "(CompilationOpContext, OperatorDesc, VectorizationContext, VectorDesc)", e);
    }
}
Also used : SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorSparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator) VectorFilterOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator) VectorReduceSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorAppMasterEventOperator(org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator) VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) VectorSMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) VectorSparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator) VectorLimitOperator(org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator) VectorReduceSinkCommonOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) AbstractVectorDesc(org.apache.hadoop.hive.ql.plan.AbstractVectorDesc) VectorDesc(org.apache.hadoop.hive.ql.plan.VectorDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)87 Operator (org.apache.hadoop.hive.ql.exec.Operator)70 ArrayList (java.util.ArrayList)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)41 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)31 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)27 Path (org.apache.hadoop.fs.Path)21 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)21 LinkedHashMap (java.util.LinkedHashMap)18 Serializable (java.io.Serializable)17 Task (org.apache.hadoop.hive.ql.exec.Task)17 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)17 HashMap (java.util.HashMap)16 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 List (java.util.List)15 Map (java.util.Map)14