Search in sources :

Example 1 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TestTezTask method setUp.

@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    utils = mock(DagUtils.class);
    fs = mock(FileSystem.class);
    path = mock(Path.class);
    when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
    when(utils.getTezDir(any(Path.class))).thenReturn(path);
    when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(LocalResource.class), any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class))).thenAnswer(new Answer<Vertex>() {

        @Override
        public Vertex answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
        }
    });
    when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer<Edge>() {

        @Override
        public Edge answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
        }
    });
    work = new TezWork("", null);
    mws = new MapWork[] { new MapWork(), new MapWork() };
    rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
    work.addAll(mws);
    work.addAll(rws);
    int i = 0;
    for (BaseWork w : work.getAllWork()) {
        w.setName("Work " + (++i));
    }
    op = mock(Operator.class);
    LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    map.put("foo", op);
    mws[0].setAliasToWork(map);
    mws[1].setAliasToWork(map);
    LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
    ArrayList<String> aliasList = new ArrayList<String>();
    aliasList.add("foo");
    pathMap.put(new Path("foo"), aliasList);
    mws[0].setPathToAliases(pathMap);
    mws[1].setPathToAliases(pathMap);
    rws[0].setReducer(op);
    rws[1].setReducer(op);
    TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    work.connect(mws[0], rws[0], edgeProp);
    work.connect(mws[1], rws[0], edgeProp);
    work.connect(rws[0], rws[1], edgeProp);
    task = new TezTask(utils);
    task.setWork(work);
    task.setConsole(mock(LogHelper.class));
    QueryPlan mockQueryPlan = mock(QueryPlan.class);
    doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
    task.setQueryPlan(mockQueryPlan);
    conf = new JobConf();
    appLr = mock(LocalResource.class);
    HiveConf hiveConf = new HiveConf();
    hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    SessionState.start(hiveConf);
    session = mock(TezClient.class);
    sessionState = mock(TezSessionState.class);
    when(sessionState.getSession()).thenReturn(session);
    when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) ArrayList(java.util.ArrayList) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) LinkedHashMap(java.util.LinkedHashMap) TezClient(org.apache.tez.client.TezClient) SessionNotRunning(org.apache.tez.dag.api.SessionNotRunning) FileSystem(org.apache.hadoop.fs.FileSystem) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DAGClient(org.apache.tez.dag.api.client.DAGClient) Edge(org.apache.tez.dag.api.Edge) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) VertexType(org.apache.hadoop.hive.ql.plan.TezWork.VertexType) Before(org.junit.Before)

Example 2 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TestGenTezWork method testCreateReduce.

@Test
public void testCreateReduce() throws SemanticException {
    // create map
    proc.process(rs, null, ctx, (Object[]) null);
    // create reduce
    proc.process(fs, null, ctx, (Object[]) null);
    TezWork work = ctx.currentTask.getWork();
    assertEquals(work.getAllWork().size(), 2);
    BaseWork w = work.getAllWork().get(1);
    assertTrue(w instanceof ReduceWork);
    assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));
    ReduceWork rw = (ReduceWork) w;
    // need to make sure names are set for tez to connect things right
    assertNotNull(w.getName());
    // map work should start with our ts op
    assertSame(rw.getReducer(), fs);
    // should have severed the ties
    assertEquals(fs.getParentOperators().size(), 0);
}
Also used : ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) Test(org.junit.Test)

Example 3 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class LocalHiveSparkClient method refreshLocalResources.

/**
   * At this point single SparkContext is used by more than one thread, so make this
   * method synchronized.
   *
   * This method can't remove a jar/resource from SparkContext. Looks like this is an
   * issue we have to live with until multiple SparkContexts are supported in a single JVM.
   */
private synchronized void refreshLocalResources(SparkWork sparkWork, HiveConf conf) {
    // add hive-exec jar
    addJars((new JobConf(this.getClass())).getJar());
    // add aux jars
    addJars(conf.getAuxJars());
    addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
    // add added jars
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
    addJars(addedJars);
    // add plugin module jars on demand
    // jobConf will hold all the configuration for hadoop, tez, and hive
    JobConf jobConf = new JobConf(conf);
    jobConf.set(MR_JAR_PROPERTY, "");
    for (BaseWork work : sparkWork.getAllWork()) {
        work.configureJobConf(jobConf);
    }
    addJars(conf.get(MR_JAR_PROPERTY));
    // add added files
    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
    addResources(addedFiles);
    // add added archives
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
    addResources(addedArchives);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 4 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class RemoteHiveSparkClient method refreshLocalResources.

private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException {
    // add hive-exec jar
    addJars((new JobConf(this.getClass())).getJar());
    // add aux jars
    addJars(conf.getAuxJars());
    addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
    // add added jars
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
    addJars(addedJars);
    // add plugin module jars on demand
    // jobConf will hold all the configuration for hadoop, tez, and hive
    JobConf jobConf = new JobConf(conf);
    jobConf.set(MR_JAR_PROPERTY, "");
    for (BaseWork work : sparkWork.getAllWork()) {
        work.configureJobConf(jobConf);
    }
    addJars(conf.get(MR_JAR_PROPERTY));
    // add added files
    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
    addResources(addedFiles);
    // add added archives
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
    addResources(addedArchives);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 5 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class Utilities method getBaseWork.

/**
   * Returns the Map or Reduce plan
   * Side effect: the BaseWork returned is also placed in the gWorkMap
   * @param conf
   * @param name
   * @return BaseWork based on the name supplied will return null if name is null
   * @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
   */
private static BaseWork getBaseWork(Configuration conf, String name) {
    Path path = null;
    InputStream in = null;
    Kryo kryo = SerializationUtilities.borrowKryo();
    try {
        String engine = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_ENGINE);
        if (engine.equals("spark")) {
            // TODO Add jar into current thread context classloader as it may be invoked by Spark driver inside
            // threads, should be unnecessary while SPARK-5377 is resolved.
            String addedJars = conf.get(HIVE_ADDED_JARS);
            if (addedJars != null && !addedJars.isEmpty()) {
                ClassLoader loader = Thread.currentThread().getContextClassLoader();
                ClassLoader newLoader = addToClassPath(loader, addedJars.split(";"));
                Thread.currentThread().setContextClassLoader(newLoader);
                kryo.setClassLoader(newLoader);
            }
        }
        path = getPlanPath(conf, name);
        LOG.info("PLAN PATH = " + path);
        if (path == null) {
            // Map/reduce plan may not be generated
            return null;
        }
        BaseWork gWork = gWorkMap.get(conf).get(path);
        if (gWork == null) {
            Path localPath = path;
            LOG.debug("local path = " + localPath);
            final long serializedSize;
            final String planMode;
            if (HiveConf.getBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN)) {
                LOG.debug("Loading plan from string: " + path.toUri().getPath());
                String planString = conf.getRaw(path.toUri().getPath());
                if (planString == null) {
                    LOG.info("Could not find plan string in conf");
                    return null;
                }
                serializedSize = planString.length();
                planMode = "RPC";
                byte[] planBytes = Base64.decodeBase64(planString);
                in = new ByteArrayInputStream(planBytes);
                in = new InflaterInputStream(in);
            } else {
                LOG.debug("Open file to read in plan: " + localPath);
                FileSystem fs = localPath.getFileSystem(conf);
                in = fs.open(localPath);
                serializedSize = fs.getFileStatus(localPath).getLen();
                planMode = "FILE";
            }
            if (MAP_PLAN_NAME.equals(name)) {
                if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, MapWork.class);
                } else if (MergeFileMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, MergeFileWork.class);
                } else if (ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, ColumnTruncateWork.class);
                } else if (PartialScanMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, PartialScanWork.class);
                } else {
                    throw new RuntimeException("unable to determine work from configuration ." + MAPRED_MAPPER_CLASS + " was " + conf.get(MAPRED_MAPPER_CLASS));
                }
            } else if (REDUCE_PLAN_NAME.equals(name)) {
                if (ExecReducer.class.getName().equals(conf.get(MAPRED_REDUCER_CLASS))) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, ReduceWork.class);
                } else {
                    throw new RuntimeException("unable to determine work from configuration ." + MAPRED_REDUCER_CLASS + " was " + conf.get(MAPRED_REDUCER_CLASS));
                }
            } else if (name.contains(MERGE_PLAN_NAME)) {
                if (name.startsWith(MAPNAME)) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, MapWork.class);
                } else if (name.startsWith(REDUCENAME)) {
                    gWork = SerializationUtilities.deserializePlan(kryo, in, ReduceWork.class);
                } else {
                    throw new RuntimeException("Unknown work type: " + name);
                }
            }
            LOG.info("Deserialized plan (via {}) - name: {} size: {}", planMode, gWork.getName(), humanReadableByteCount(serializedSize));
            gWorkMap.get(conf).put(path, gWork);
        } else if (LOG.isDebugEnabled()) {
            LOG.debug("Found plan in cache for name: " + name);
        }
        return gWork;
    } catch (FileNotFoundException fnf) {
        // happens. e.g.: no reduce work.
        LOG.debug("No plan file found: " + path + "; " + fnf.getMessage());
        return null;
    } catch (Exception e) {
        String msg = "Failed to load plan: " + path;
        LOG.error("Failed to load plan: " + path, e);
        throw new RuntimeException(msg, e);
    } finally {
        SerializationUtilities.releaseKryo(kryo);
        if (in != null) {
            try {
                in.close();
            } catch (IOException cantBlameMeForTrying) {
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ByteArrayInputStream(java.io.ByteArrayInputStream) InflaterInputStream(java.util.zip.InflaterInputStream) InputStream(java.io.InputStream) InflaterInputStream(java.util.zip.InflaterInputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) SQLFeatureNotSupportedException(java.sql.SQLFeatureNotSupportedException) SQLTransientException(java.sql.SQLTransientException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ByteArrayInputStream(java.io.ByteArrayInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) URLClassLoader(java.net.URLClassLoader) ColumnTruncateMapper(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateMapper) ColumnTruncateWork(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) ExecMapper(org.apache.hadoop.hive.ql.exec.mr.ExecMapper) Kryo(com.esotericsoftware.kryo.Kryo)

Aggregations

BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)43 Operator (org.apache.hadoop.hive.ql.exec.Operator)12 ArrayList (java.util.ArrayList)11 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)10 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)9 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)9 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)8 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)8 LinkedList (java.util.LinkedList)7 List (java.util.List)7 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)7 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)7 JobConf (org.apache.hadoop.mapred.JobConf)7 Path (org.apache.hadoop.fs.Path)6 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)6 SparkEdgeProperty (org.apache.hadoop.hive.ql.plan.SparkEdgeProperty)6 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 TezEdgeProperty (org.apache.hadoop.hive.ql.plan.TezEdgeProperty)6