Search in sources :

Example 6 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestDynamicPartitionPruner method testMultipleSourcesOrdering2.

@Test(timeout = 5000)
public void testMultipleSourcesOrdering2() throws InterruptedException, SerDeException {
    InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
    doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
    doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
    MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
    DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
    PruneRunnable pruneRunnable = new PruneRunnable(pruner);
    Thread t = new Thread(pruneRunnable);
    t.start();
    try {
        pruneRunnable.start();
        InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV1.setSourceVertexName("v1");
        InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV2.setSourceVertexName("v2");
        // 2 X 2 events for V1. 3 X 1 events for V2
        pruner.processVertex("v1");
        pruner.processVertex("v2");
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruneRunnable.awaitEnd();
        assertFalse(pruneRunnable.inError.get());
    } finally {
        t.interrupt();
        t.join();
    }
}
Also used : InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) Test(org.junit.Test)

Example 7 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestDynamicPartitionPruner method createMockMapWork.

private MapWork createMockMapWork(TestSource... testSources) {
    MapWork mapWork = mock(MapWork.class);
    Map<String, List<TableDesc>> tableMap = new HashMap<>();
    Map<String, List<String>> columnMap = new HashMap<>();
    Map<String, List<String>> typeMap = new HashMap<>();
    Map<String, List<ExprNodeDesc>> exprMap = new HashMap<>();
    int count = 0;
    for (TestSource testSource : testSources) {
        for (int i = 0; i < testSource.numExpressions; i++) {
            List<TableDesc> tableDescList = tableMap.get(testSource.vertexName);
            if (tableDescList == null) {
                tableDescList = new LinkedList<>();
                tableMap.put(testSource.vertexName, tableDescList);
            }
            tableDescList.add(mock(TableDesc.class));
            List<String> columnList = columnMap.get(testSource.vertexName);
            if (columnList == null) {
                columnList = new LinkedList<>();
                columnMap.put(testSource.vertexName, columnList);
            }
            columnList.add(testSource.vertexName + "c_" + count + "_" + i);
            List<String> typeList = typeMap.get(testSource.vertexName);
            if (typeList == null) {
                typeList = new LinkedList<>();
                typeMap.put(testSource.vertexName, typeList);
            }
            typeList.add("string");
            List<ExprNodeDesc> exprNodeDescList = exprMap.get(testSource.vertexName);
            if (exprNodeDescList == null) {
                exprNodeDescList = new LinkedList<>();
                exprMap.put(testSource.vertexName, exprNodeDescList);
            }
            exprNodeDescList.add(mock(ExprNodeDesc.class));
        }
        count++;
    }
    doReturn(tableMap).when(mapWork).getEventSourceTableDescMap();
    doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap();
    doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap();
    doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();
    return mapWork;
}
Also used : HashMap(java.util.HashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) List(java.util.List) LinkedList(java.util.LinkedList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 8 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class VectorizedColumnReaderTestBase method initialVectorizedRowBatchCtx.

protected void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
    MapWork mapWork = new MapWork();
    VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
    rbCtx.init(createStructObjectInspector(conf), new String[0]);
    mapWork.setVectorMode(true);
    mapWork.setVectorizedRowBatchCtx(rbCtx);
    Utilities.setMapWork(conf, mapWork);
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 9 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestGenTezWork method setUp.

/**
   * @throws java.lang.Exception
   */
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    // Init conf
    final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
    SessionState.start(conf);
    // Init parse context
    final ParseContext pctx = new ParseContext();
    pctx.setContext(new Context(conf));
    ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
    proc = new GenTezWork(new GenTezUtils() {

        @Override
        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
            LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
            map.put("foo", root);
            mapWork.setAliasToWork(map);
            return;
        }
    });
    CompilationOpContext cCtx = new CompilationOpContext();
    fs = new FileSinkOperator(cCtx);
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator(cCtx);
    rs.setConf(new ReduceSinkDesc());
    TableDesc tableDesc = new TableDesc();
    tableDesc.setProperties(new Properties());
    rs.getConf().setKeySerializeInfo(tableDesc);
    ts = new TableScanOperator(cCtx);
    ts.setConf(new TableScanDesc(null));
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
    fs.getParentOperators().add(rs);
    ctx.preceedingWork = null;
    ctx.currentRootOperator = ts;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Before(org.junit.Before)

Example 10 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class SparkMapRecordHandler method init.

@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    isLogInfoEnabled = LOG.isInfoEnabled();
    try {
        jc = job;
        execContext = new ExecMapperContext(jc);
        // create map and fetch operators
        MapWork mrwork = Utilities.getMapWork(job);
        CompilationOpContext runtimeCtx = new CompilationOpContext();
        if (mrwork.getVectorMode()) {
            mo = new VectorMapOperator(runtimeCtx);
        } else {
            mo = new MapOperator(runtimeCtx);
        }
        mo.setConf(mrwork);
        // initialize map operator
        mo.initialize(jc, null);
        mo.setChildren(job);
        LOG.info(mo.dump(0));
        // initialize map local work
        localWork = mrwork.getMapRedLocalWork();
        execContext.setLocalWork(localWork);
        MapredContext.init(true, new JobConf(jc));
        MapredContext.get().setReporter(reporter);
        mo.passExecContext(execContext);
        mo.initializeLocalWork(jc);
        mo.initializeMapOperator(jc);
        OperatorUtils.setChildrenCollector(mo.getChildOperators(), output);
        mo.setReporter(rp);
        if (localWork == null) {
            return;
        }
        //The following code is for mapjoin
        //initialize all the dummy ops
        LOG.info("Initializing dummy operator");
        List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
        for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
            dummyOp.setExecContext(execContext);
            dummyOp.initialize(jc, null);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Map operator initialization failed: " + e, e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapOperator(org.apache.hadoop.hive.ql.exec.MapOperator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) VectorMapOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

MapWork (org.apache.hadoop.hive.ql.plan.MapWork)65 ArrayList (java.util.ArrayList)20 Path (org.apache.hadoop.fs.Path)20 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)19 Operator (org.apache.hadoop.hive.ql.exec.Operator)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)14 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)12 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)12 Test (org.junit.Test)12 Task (org.apache.hadoop.hive.ql.exec.Task)11 JobConf (org.apache.hadoop.mapred.JobConf)11 Serializable (java.io.Serializable)10 LinkedHashMap (java.util.LinkedHashMap)10 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)10 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)9 List (java.util.List)8 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)8