use of org.apache.spark.scheduler.ResultStage in project OpenLineage by OpenLineage.
the class RddExecutionContext method printStages.
private void printStages(String prefix, Stage stage) {
if (stage instanceof ResultStage) {
ResultStage resultStage = (ResultStage) stage;
}
printRDDs(prefix + "(stageId:" + stage.id() + ")-(" + stage.getClass().getSimpleName() + ")- RDD: ", stage.rdd());
Collection<Stage> parents = asJavaCollection(stage.parents());
for (Stage parent : parents) {
printStages(prefix + " \\ ", parent);
}
}
use of org.apache.spark.scheduler.ResultStage in project OpenLineage by OpenLineage.
the class RddExecutionContext method setActiveJob.
@Override
public void setActiveJob(ActiveJob activeJob) {
RDD<?> finalRDD = activeJob.finalStage().rdd();
this.jobSuffix = nameRDD(finalRDD);
Set<RDD<?>> rdds = Rdds.flattenRDDs(finalRDD);
this.inputs = findInputs(rdds);
Configuration jc = new JobConf();
if (activeJob.finalStage() instanceof ResultStage) {
Function2<TaskContext, Iterator<?>, ?> fn = ((ResultStage) activeJob.finalStage()).func();
try {
Field f = getConfigField(fn);
f.setAccessible(true);
HadoopMapRedWriteConfigUtil configUtil = Optional.of(f.get(fn)).filter(HadoopMapRedWriteConfigUtil.class::isInstance).map(HadoopMapRedWriteConfigUtil.class::cast).orElseThrow(() -> new NoSuchFieldException("Field is not instance of HadoopMapRedWriteConfigUtil"));
Field confField = HadoopMapRedWriteConfigUtil.class.getDeclaredField("conf");
confField.setAccessible(true);
SerializableJobConf conf = (SerializableJobConf) confField.get(configUtil);
jc = conf.value();
} catch (IllegalAccessException | NoSuchFieldException nfe) {
log.warn("Unable to access job conf from RDD", nfe);
}
log.info("Found job conf from RDD {}", jc);
} else {
jc = OpenLineageSparkListener.getConfigForRDD(finalRDD);
}
this.outputs = findOutputs(finalRDD, jc);
}
Aggregations