Search in sources :

Example 26 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class SerializationUtilities method cloneOperatorTree.

/**
   * Clones using the powers of XML. Do not use unless necessary.
   * @param roots The roots.
   * @return The clone.
   */
public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
    CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
    serializePlan(roots, baos, true);
    @SuppressWarnings("unchecked") List<Operator<?>> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), roots.getClass(), true);
    // Restore the context.
    LinkedList<Operator<?>> newOps = new LinkedList<>(result);
    while (!newOps.isEmpty()) {
        Operator<?> newOp = newOps.poll();
        newOp.setCompilationOpContext(ctx);
        List<Operator<?>> children = newOp.getChildOperators();
        if (children != null) {
            newOps.addAll(children);
        }
    }
    return result;
}
Also used : VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) ByteArrayInputStream(java.io.ByteArrayInputStream) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ByteArrayOutputStream(java.io.ByteArrayOutputStream) LinkedList(java.util.LinkedList)

Example 27 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class BaseScalarUdfTest method testUdf.

/**
   * This method drives the test. It takes the data from getBaseTable() and
   * feeds it through a SELECT operator with a COLLECT operator after. Each
   * row that is produced by the collect operator is compared to getExpectedResult()
   * and if every row is the expected result the method completes without asserting.
   * @throws HiveException
   */
public final void testUdf() throws HiveException {
    InspectableObject[] data = getBaseTable();
    List<ExprNodeDesc> expressionList = getExpressionList();
    SelectDesc selectCtx = new SelectDesc(expressionList, OperatorTestUtils.createOutputColumnNames(expressionList));
    Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
    op.setConf(selectCtx);
    CollectDesc cd = new CollectDesc(Integer.valueOf(10));
    CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op);
    op.initialize(new JobConf(OperatorTestUtils.class), new ObjectInspector[] { data[0].oi });
    OperatorTestUtils.assertResults(op, cdop, data, getExpectedResult());
}
Also used : InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) CollectOperator(org.apache.hadoop.hive.ql.exec.CollectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) JobConf(org.apache.hadoop.mapred.JobConf)

Example 28 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 29 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class ExecMapper method configure.

@Override
public void configure(JobConf job) {
    execContext = new ExecMapperContext(job);
    // Allocate the bean at the beginning -
    try {
        l4j.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
        l4j.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
        l4j.info("cannot get classpath: " + e.getMessage());
    }
    setDone(false);
    try {
        jc = job;
        execContext.setJc(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        }
        mo.setConf(mrwork);
        // initialize map operator
        mo.initialize(job, null);
        mo.setChildren(job);
        l4j.info(mo.dump(0));
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        execContext.setLocalWork(localWork);
        MapredContext.init(true, new JobConf(jc));
        mo.passExecContext(execContext);
        mo.initializeLocalWork(jc);
        mo.initializeMapOperator(jc);
        if (localWork == null) {
            return;
        }
        //The following code is for mapjoin
        //initialize all the dummy ops
        l4j.info("Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.passExecContext(execContext);
            dummyOp.initialize(jc, null);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed", e);
        }
    }
}
Also used : MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) IOException(java.io.IOException) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 30 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)40 ArrayList (java.util.ArrayList)25 JobConf (org.apache.hadoop.mapred.JobConf)12 FakeCaptureOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)10 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)10 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)10 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 HashMap (java.util.HashMap)7 LinkedHashMap (java.util.LinkedHashMap)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 HashSet (java.util.HashSet)5 Path (org.apache.hadoop.fs.Path)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Set (java.util.Set)4 Configuration (org.apache.hadoop.conf.Configuration)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 Test (org.junit.Test)4