Search in sources :

Example 21 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TestTezTask method setUp.

@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    utils = mock(DagUtils.class);
    fs = mock(FileSystem.class);
    path = mock(Path.class);
    when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
    when(utils.getTezDir(any(Path.class))).thenReturn(path);
    when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class), any(Map.class))).thenAnswer(new Answer<Vertex>() {

        @Override
        public Vertex answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
        }
    });
    when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))).thenAnswer(new Answer<Edge>() {

        @Override
        public Edge answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
        }
    });
    work = new TezWork("", null);
    mws = new MapWork[] { new MapWork(), new MapWork() };
    rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
    work.addAll(mws);
    work.addAll(rws);
    int i = 0;
    for (BaseWork w : work.getAllWork()) {
        w.setName("Work " + (++i));
    }
    op = mock(Operator.class);
    LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    map.put("foo", op);
    mws[0].setAliasToWork(map);
    mws[1].setAliasToWork(map);
    LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
    ArrayList<String> aliasList = new ArrayList<String>();
    aliasList.add("foo");
    pathMap.put(new Path("foo"), aliasList);
    mws[0].setPathToAliases(pathMap);
    mws[1].setPathToAliases(pathMap);
    rws[0].setReducer(op);
    rws[1].setReducer(op);
    TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    work.connect(mws[0], rws[0], edgeProp);
    work.connect(mws[1], rws[0], edgeProp);
    work.connect(rws[0], rws[1], edgeProp);
    task = new TezTask(utils);
    task.setWork(work);
    task.setConsole(mock(LogHelper.class));
    QueryPlan mockQueryPlan = mock(QueryPlan.class);
    doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
    task.setQueryPlan(mockQueryPlan);
    conf = new JobConf();
    appLr = createResource("foo.jar");
    HiveConf hiveConf = new HiveConf();
    hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    SessionState.start(hiveConf);
    session = mock(TezClient.class);
    sessionState = mock(TezSessionState.class);
    when(sessionState.getSession()).thenReturn(session);
    when(sessionState.reopen()).thenReturn(sessionState);
    when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) ArrayList(java.util.ArrayList) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) LinkedHashMap(java.util.LinkedHashMap) TezClient(org.apache.tez.client.TezClient) SessionNotRunning(org.apache.tez.dag.api.SessionNotRunning) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DAGClient(org.apache.tez.dag.api.client.DAGClient) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Edge(org.apache.tez.dag.api.Edge) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) VertexType(org.apache.hadoop.hive.ql.plan.TezWork.VertexType) Before(org.junit.Before)

Example 22 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class LocalHiveSparkClient method refreshLocalResources.

/**
 * At this point single SparkContext is used by more than one thread, so make this
 * method synchronized.
 *
 * This method can't remove a jar/resource from SparkContext. Looks like this is an
 * issue we have to live with until multiple SparkContexts are supported in a single JVM.
 */
private synchronized void refreshLocalResources(SparkWork sparkWork, HiveConf conf) {
    // add hive-exec jar
    addJars((new JobConf(this.getClass())).getJar());
    // add aux jars
    addJars(conf.getAuxJars());
    addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
    // add added jars
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
    addJars(addedJars);
    // add plugin module jars on demand
    // jobConf will hold all the configuration for hadoop, tez, and hive
    JobConf jobConf = new JobConf(conf);
    jobConf.set(MR_JAR_PROPERTY, "");
    for (BaseWork work : sparkWork.getAllWork()) {
        work.configureJobConf(jobConf);
    }
    addJars(jobConf.get(MR_JAR_PROPERTY));
    // add added files
    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
    addResources(addedFiles);
    // add added archives
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
    addResources(addedArchives);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 23 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class RemoteHiveSparkClient method refreshLocalResources.

private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException {
    // add hive-exec jar
    addJars((new JobConf(this.getClass())).getJar());
    // add aux jars
    addJars(conf.getAuxJars());
    addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
    // add added jars
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
    addJars(addedJars);
    // add plugin module jars on demand
    // jobConf will hold all the configuration for hadoop, tez, and hive
    JobConf jobConf = new JobConf(conf);
    jobConf.set(MR_JAR_PROPERTY, "");
    for (BaseWork work : sparkWork.getAllWork()) {
        work.configureJobConf(jobConf);
    }
    addJars(jobConf.get(MR_JAR_PROPERTY));
    // remove the location of container tokens
    conf.unset(MR_CREDENTIALS_LOCATION_PROPERTY);
    // add added files
    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
    addResources(addedFiles);
    // add added archives
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
    addResources(addedArchives);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 24 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class SparkPlanGenerator method generate.

public SparkPlan generate(SparkWork sparkWork) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
    SparkPlan sparkPlan = new SparkPlan(this.sc.sc());
    cloneToWork = sparkWork.getCloneToWork();
    workToTranMap.clear();
    workToParentWorkTranMap.clear();
    try {
        for (BaseWork work : sparkWork.getAllWork()) {
            perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
            SparkTran tran = generate(work, sparkWork);
            SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
            sparkPlan.addTran(tran);
            sparkPlan.connect(parentTran, tran);
            workToTranMap.put(work, tran);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
        }
    } finally {
        // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
        // as this may executed in a pool thread.
        Utilities.clearWorkMap(jobConf);
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
    return sparkPlan;
}
Also used : BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 25 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class DagUtils method createEdgeProperty.

/*
   * Helper function to create an edge property from an edge type.
   */
private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, Configuration conf, BaseWork work, TezWork tezWork) throws IOException {
    MRHelpers.translateMRConfToTez(conf);
    String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS);
    String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS);
    String partitionerClassName = conf.get("mapred.partitioner.class");
    Map<String, String> partitionerConf;
    EdgeType edgeType = edgeProp.getEdgeType();
    switch(edgeType) {
        case BROADCAST_EDGE:
            UnorderedKVEdgeConfig et1Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et1Conf.createDefaultBroadcastEdgeProperty();
        case CUSTOM_EDGE:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            EdgeManagerPluginDescriptor edgeDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
            CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
            DataOutputBuffer dob = new DataOutputBuffer();
            edgeConf.write(dob);
            byte[] userPayload = dob.getData();
            edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
            return et2Conf.createDefaultCustomEdgeProperty(edgeDesc);
        case CUSTOM_SIMPLE_EDGE:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et3Conf.createDefaultEdgeProperty();
        case ONE_TO_ONE_EDGE:
            UnorderedKVEdgeConfig et4Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et4Conf.createDefaultOneToOneEdgeProperty();
        case XPROD_EDGE:
            EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
            List<String> crossProductSources = new ArrayList<>();
            for (BaseWork parentWork : tezWork.getParents(work)) {
                if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) {
                    crossProductSources.add(parentWork.getName());
                }
            }
            CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
            edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)));
            UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, ValueHashPartitioner.class.getName()).build();
            return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
        case SIMPLE_EDGE:
        // fallthrough
        default:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            OrderedPartitionedKVEdgeConfig et5Conf = OrderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), TezBytesComparator.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et5Conf.createDefaultEdgeProperty();
    }
}
Also used : OrderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig) ArrayList(java.util.ArrayList) MRPartitioner(org.apache.tez.mapreduce.partition.MRPartitioner) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) TezBytesComparator(org.apache.tez.runtime.library.common.comparator.TezBytesComparator) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) CartesianProductEdgeManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) UnorderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig) TezBytesWritableSerialization(org.apache.tez.runtime.library.common.serializer.TezBytesWritableSerialization) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Aggregations

BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)54 ArrayList (java.util.ArrayList)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)14 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)11 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)10 LinkedList (java.util.LinkedList)9 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)9 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)9 List (java.util.List)8 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)8 JobConf (org.apache.hadoop.mapred.JobConf)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 SparkEdgeProperty (org.apache.hadoop.hive.ql.plan.SparkEdgeProperty)7 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)7 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)6