use of org.apache.hive.spark.counter.SparkCounters in project hive by apache.
the class LocalHiveSparkClient method execute.
@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Context ctx = driverContext.getCtx();
HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
JobConf jobConf = new JobConf(hiveConf);
// Create temporary scratch dir
Path emptyScratchDir;
emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
fs.mkdirs(emptyScratchDir);
// Update credential provider location
// the password to the credential provider in already set in the sparkConf
// in HiveSparkClientFactory
HiveConfUtil.updateJobCredentialProviders(jobConf);
SparkCounters sparkCounters = new SparkCounters(sc);
Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
if (prefixes != null) {
for (String group : prefixes.keySet()) {
for (String counterName : prefixes.get(group)) {
sparkCounters.createCounter(group, counterName);
}
}
}
SparkReporter sparkReporter = new SparkReporter(sparkCounters);
// Generate Spark plan
SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);
if (driverContext.isShutdown()) {
throw new HiveException("Operation is cancelled.");
}
// Execute generated plan.
JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
// We use Spark RDD async action to submit job as it's the only way to get jobId now.
JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
// As we always use foreach action to submit RDD graph, it would only trigger one job.
int jobId = future.jobIds().get(0);
LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
Aggregations