Search in sources :

Example 31 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 32 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class ExecMapper method configure.

@Override
public void configure(JobConf job) {
    execContext = new ExecMapperContext(job);
    // Allocate the bean at the beginning -
    try {
        l4j.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
        l4j.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
    } catch (Exception e) {
        l4j.info("cannot get classpath: " + e.getMessage());
    }
    setDone(false);
    try {
        jc = job;
        execContext.setJc(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        }
        mo.setConf(mrwork);
        // initialize map operator
        mo.initialize(job, null);
        mo.setChildren(job);
        l4j.info(mo.dump(0));
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        execContext.setLocalWork(localWork);
        MapredContext.init(true, new JobConf(jc));
        mo.passExecContext(execContext);
        mo.initializeLocalWork(jc);
        mo.initializeMapOperator(jc);
        if (localWork == null) {
            return;
        }
        // The following code is for mapjoin
        // initialize all the dummy ops
        l4j.info("Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.passExecContext(execContext);
            dummyOp.initialize(jc, null);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed", e);
        }
    }
}
Also used : MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) IOException(java.io.IOException) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 33 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (CollectionUtils.isEmpty(directWorks)) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 34 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestFileSinkOperator method getFileSink.

private FileSinkOperator getFileSink(AcidUtils.Operation writeType, boolean dynamic, long writeId) throws IOException, HiveException {
    TableDesc tableDesc = null;
    switch(writeType) {
        case DELETE:
        case UPDATE:
        case INSERT:
            tableDesc = acidTableDescriptor;
            break;
        case NOT_ACID:
            tableDesc = nonAcidTableDescriptor;
            break;
    }
    FileSinkDesc desc = null;
    if (dynamic) {
        ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>(1);
        partCols.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, PARTCOL_NAME, "a", true));
        Map<String, String> partColMap = new LinkedHashMap<String, String>(1);
        partColMap.put(PARTCOL_NAME, null);
        DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(null, partColMap, "Sunday", 100);
        // todo: does this need the finalDestination?
        desc = new FileSinkDesc(basePath, tableDesc, false, 1, false, false, 1, 1, partCols, dpCtx, null, null, false, false);
    } else {
        desc = new FileSinkDesc(basePath, tableDesc, false);
    }
    desc.setWriteType(writeType);
    desc.setGatherStats(true);
    if (writeId > 0) {
        desc.setTableWriteId(writeId);
    }
    if (writeType != AcidUtils.Operation.NOT_ACID) {
        desc.setTableWriteId(1L);
    }
    FileSinkOperator op = (FileSinkOperator) OperatorFactory.get(new CompilationOpContext(), FileSinkDesc.class);
    op.setConf(desc);
    op.initialize(jc, new ObjectInspector[] { inspector });
    return op;
}
Also used : FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LinkedHashMap(java.util.LinkedHashMap)

Example 35 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestOperators method testScriptOperatorEnvVarsProcessing.

/**
 *  When ScriptOperator runs external script, it passes job configuration as environment
 *  variables. But environment variables have some system limitations and we have to check
 *  job configuration properties firstly. This test checks that staff.
 */
public void testScriptOperatorEnvVarsProcessing() throws Throwable {
    try {
        ScriptOperator scriptOperator = new ScriptOperator(new CompilationOpContext());
        // Environment Variables name
        assertEquals("a_b_c", scriptOperator.safeEnvVarName("a.b.c"));
        assertEquals("a_b_c", scriptOperator.safeEnvVarName("a-b-c"));
        // Environment Variables short values
        assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", false));
        assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", true));
        // Environment Variables long values
        char[] array = new char[20 * 1024 + 1];
        Arrays.fill(array, 'a');
        String hugeEnvVar = new String(array);
        assertEquals(20 * 1024 + 1, hugeEnvVar.length());
        assertEquals(20 * 1024 + 1, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", false).length());
        assertEquals(20 * 1024, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", true).length());
        // Full test
        Configuration hconf = new JobConf(ScriptOperator.class);
        hconf.set("name", hugeEnvVar);
        Map<String, String> env = new HashMap<String, String>();
        HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, false);
        scriptOperator.addJobConfToEnvironment(hconf, env);
        assertEquals(20 * 1024 + 1, env.get("name").length());
        HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, true);
        scriptOperator.addJobConfToEnvironment(hconf, env);
        assertEquals(20 * 1024, env.get("name").length());
        System.out.println("Script Operator Environment Variables processing ok");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)44 ArrayList (java.util.ArrayList)27 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)12 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)12 JobConf (org.apache.hadoop.mapred.JobConf)12 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 HashMap (java.util.HashMap)7 LinkedHashMap (java.util.LinkedHashMap)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 HashSet (java.util.HashSet)5 Configuration (org.apache.hadoop.conf.Configuration)5 Path (org.apache.hadoop.fs.Path)5 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Set (java.util.Set)4