Search in sources :

Example 71 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class ExecMapper method configure.

@Override
public void configure(JobConf job) {
    execContext = new ExecMapperContext(job);
    // Allocate the bean at the beginning -
    try {
        l4j.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
        l4j.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
        l4j.info("cannot get classpath: " + e.getMessage());
    }
    setDone(false);
    try {
        jc = job;
        execContext.setJc(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        }
        mo.setConf(mrwork);
        // initialize map operator
        mo.initialize(job, null);
        mo.setChildren(job);
        l4j.info(mo.dump(0));
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        execContext.setLocalWork(localWork);
        MapredContext.init(true, new JobConf(jc));
        mo.passExecContext(execContext);
        mo.initializeLocalWork(jc);
        mo.initializeMapOperator(jc);
        if (localWork == null) {
            return;
        }
        // The following code is for mapjoin
        // initialize all the dummy ops
        l4j.info("Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.passExecContext(execContext);
            dummyOp.initialize(jc, null);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed", e);
        }
    }
}
Also used : MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) IOException(java.io.IOException) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 72 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (CollectionUtils.isEmpty(directWorks)) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 73 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class OperatorFactory method getAndMakeChild.

/**
 * Returns an operator given the conf and a list of parent operators.
 */
public static <T extends OperatorDesc> Operator<T> getAndMakeChild(CompilationOpContext ctx, T conf, RowSchema rwsch, Operator[] oplist) {
    Operator<T> ret = get(ctx, (Class<T>) conf.getClass());
    ret.setConf(conf);
    ret.setSchema(rwsch);
    if (oplist.length == 0)
        return ret;
    // Add the new operator as child of each of the passed in operators
    for (Operator op : oplist) {
        List<Operator> children = op.getChildOperators();
        children.add(ret);
        op.setChildOperators(children);
    }
    // add parents for the newly created operator
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    for (Operator op : oplist) {
        parent.add(op);
    }
    ret.setParentOperators(parent);
    return (ret);
}
Also used : SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorSparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator) VectorFilterOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator) VectorReduceSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorAppMasterEventOperator(org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator) VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) VectorSMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) VectorSparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator) VectorLimitOperator(org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator) VectorReduceSinkCommonOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator) ArrayList(java.util.ArrayList) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 74 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class OperatorFactory method getAndMakeChild.

/**
 * Returns an operator given the conf and a list of parent operators.
 */
public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf, Operator oplist0, Operator... oplist) {
    Operator<T> ret = get(oplist0.getCompilationOpContext(), (Class<T>) conf.getClass());
    ret.setConf(conf);
    // Add the new operator as child of each of the passed in operators
    List<Operator> children = oplist0.getChildOperators();
    children.add(ret);
    oplist0.setChildOperators(children);
    for (Operator op : oplist) {
        children = op.getChildOperators();
        children.add(ret);
        op.setChildOperators(children);
    }
    // add parents for the newly created operator
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    parent.add(oplist0);
    for (Operator op : oplist) {
        parent.add(op);
    }
    ret.setParentOperators(parent);
    return (ret);
}
Also used : SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorSparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator) VectorFilterOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator) VectorReduceSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorAppMasterEventOperator(org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator) VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) VectorSMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) VectorSparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator) VectorLimitOperator(org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator) VectorReduceSinkCommonOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator) ArrayList(java.util.ArrayList) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 75 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class OperatorFactory method getAndMakeChild.

/**
 * Returns an operator given the conf and a list of parent operators.
 */
public static <T extends OperatorDesc> Operator<T> getAndMakeChild(CompilationOpContext cContext, T conf, List<Operator<? extends OperatorDesc>> oplist) {
    Operator<T> ret = get(cContext, (Class<T>) conf.getClass());
    ret.setConf(conf);
    if (oplist.size() == 0) {
        return ret;
    }
    // Add the new operator as child of each of the passed in operators
    for (Operator op : oplist) {
        List<Operator> children = op.getChildOperators();
        children.add(ret);
    }
    // add parents for the newly created operator
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    for (Operator op : oplist) {
        parent.add(op);
    }
    ret.setParentOperators(parent);
    return ret;
}
Also used : SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorSparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator) VectorFilterOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator) VectorReduceSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorAppMasterEventOperator(org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator) VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) VectorSMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) VectorSparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator) VectorLimitOperator(org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator) VectorReduceSinkCommonOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator) ArrayList(java.util.ArrayList) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)87 Operator (org.apache.hadoop.hive.ql.exec.Operator)70 ArrayList (java.util.ArrayList)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)41 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)31 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)27 Path (org.apache.hadoop.fs.Path)21 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)21 LinkedHashMap (java.util.LinkedHashMap)18 Serializable (java.io.Serializable)17 Task (org.apache.hadoop.hive.ql.exec.Task)17 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)17 HashMap (java.util.HashMap)16 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 List (java.util.List)15 Map (java.util.Map)14