use of org.apache.spark.scheduler.ActiveJob in project zeppelin by apache.
the class SparkInterpreter method getProgress.
@Override
public int getProgress(InterpreterContext context) {
String jobGroup = Utils.buildJobGroupId(context);
int completedTasks = 0;
int totalTasks = 0;
DAGScheduler scheduler = sc.dagScheduler();
if (scheduler == null) {
return 0;
}
HashSet<ActiveJob> jobs = scheduler.activeJobs();
if (jobs == null || jobs.size() == 0) {
return 0;
}
Iterator<ActiveJob> it = jobs.iterator();
while (it.hasNext()) {
ActiveJob job = it.next();
String g = (String) job.properties().get("spark.jobGroup.id");
if (jobGroup.equals(g)) {
int[] progressInfo = null;
try {
Object finalStage = job.getClass().getMethod("finalStage").invoke(job);
if (sparkVersion.getProgress1_0()) {
progressInfo = getProgressFromStage_1_0x(sparkListener, finalStage);
} else {
progressInfo = getProgressFromStage_1_1x(sparkListener, finalStage);
}
} catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
logger.error("Can't get progress info", e);
return 0;
}
totalTasks += progressInfo[0];
completedTasks += progressInfo[1];
}
}
if (totalTasks == 0) {
return 0;
}
return completedTasks * 100 / totalTasks;
}
use of org.apache.spark.scheduler.ActiveJob in project OpenLineage by OpenLineage.
the class OpenLineageSparkListener method onJobStart.
/**
* called by the SparkListener when a job starts
*/
@Override
public void onJobStart(SparkListenerJobStart jobStart) {
Optional<ActiveJob> activeJob = asJavaOptional(SparkSession.getDefaultSession().map(sparkContextFromSession).orElse(activeSparkContext)).flatMap(ctx -> Optional.ofNullable(ctx.dagScheduler()).map(ds -> ds.jobIdToActiveJob().get(jobStart.jobId()))).flatMap(ScalaConversionUtils::asJavaOptional);
Set<Integer> stages = ScalaConversionUtils.fromSeq(jobStart.stageIds()).stream().map(Integer.class::cast).collect(Collectors.toSet());
jobMetrics.addJobStages(jobStart.jobId(), stages);
ExecutionContext context = Optional.ofNullable(getSqlExecutionId(jobStart.properties())).map(Optional::of).orElseGet(() -> asJavaOptional(SparkSession.getDefaultSession().map(sparkContextFromSession).orElse(activeSparkContext)).flatMap(ctx -> Optional.ofNullable(ctx.dagScheduler()).map(ds -> ds.jobIdToActiveJob().get(jobStart.jobId())).flatMap(ScalaConversionUtils::asJavaOptional)).map(job -> getSqlExecutionId(job.properties()))).map(id -> {
long executionId = Long.parseLong(id);
return getExecutionContext(jobStart.jobId(), executionId);
}).orElseGet(() -> getExecutionContext(jobStart.jobId()));
// set it in the rddExecutionRegistry so jobEnd is called
rddExecutionRegistry.put(jobStart.jobId(), context);
activeJob.ifPresent(context::setActiveJob);
context.start(jobStart);
}
Aggregations