Search in sources :

Example 1 with JobContext

use of org.apache.hive.spark.client.JobContext in project hive by apache.

the class TestHiveSparkClient method testSetJobGroupAndDescription.

@Test
public void testSetJobGroupAndDescription() throws Exception {
    String confDir = "../data/conf/spark/local/hive-site.xml";
    HiveConf.setHiveSiteLocation(new File(confDir).toURI().toURL());
    HiveConf conf = new HiveConf();
    // Set to false because we don't launch a job using LocalHiveSparkClient so the
    // hive-kryo-registrator jar is never added to the classpath
    conf.setBoolVar(HiveConf.ConfVars.SPARK_OPTIMIZE_SHUFFLE_SERDE, false);
    conf.set("spark.local.dir", Paths.get(System.getProperty("test.tmp.dir"), "TestHiveSparkClient-local-dir").toString());
    SessionState.start(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path tmpDir = new Path("TestHiveSparkClient-tmp");
    IDriver driver = null;
    JavaSparkContext sc = null;
    try {
        driver = DriverFactory.newDriver(conf);
        driver.run("create table test (col int)");
        String query = "select * from test order by col";
        ((ReExecDriver) driver).compile(query, true);
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(driver.getPlan().getRootTasks());
        Assert.assertEquals(1, sparkTasks.size());
        SparkTask sparkTask = sparkTasks.get(0);
        conf.set(MRJobConfig.JOB_NAME, query);
        JobConf jobConf = new JobConf(conf);
        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local");
        sparkConf.setAppName("TestHiveSparkClient-app");
        sc = new JavaSparkContext(sparkConf);
        byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf);
        byte[] scratchDirBytes = KryoSerializer.serialize(tmpDir);
        byte[] sparkWorkBytes = KryoSerializer.serialize(sparkTask.getWork());
        RemoteHiveSparkClient.JobStatusJob job = new RemoteHiveSparkClient.JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes);
        JobContext mockJobContext = mock(JobContext.class);
        when(mockJobContext.sc()).thenReturn(sc);
        job.call(mockJobContext);
        Assert.assertTrue(sc.getLocalProperty("spark.job.description").contains(query));
        Assert.assertTrue(sc.getLocalProperty("spark.jobGroup.id").contains(sparkTask.getWork().getQueryId()));
    } finally {
        if (driver != null) {
            driver.run("drop table if exists test");
            driver.destroy();
        }
        if (sc != null) {
            sc.close();
        }
        if (fs.exists(tmpDir)) {
            fs.delete(tmpDir, true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ReExecDriver(org.apache.hadoop.hive.ql.reexec.ReExecDriver) FileSystem(org.apache.hadoop.fs.FileSystem) IDriver(org.apache.hadoop.hive.ql.IDriver) HiveConf(org.apache.hadoop.hive.conf.HiveConf) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) JobContext(org.apache.hive.spark.client.JobContext) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Aggregations

File (java.io.File)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 IDriver (org.apache.hadoop.hive.ql.IDriver)1 ReExecDriver (org.apache.hadoop.hive.ql.reexec.ReExecDriver)1 JobConf (org.apache.hadoop.mapred.JobConf)1 JobContext (org.apache.hive.spark.client.JobContext)1 SparkConf (org.apache.spark.SparkConf)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 Test (org.junit.Test)1