use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.
the class OpenLineageRunEventBuilder method buildRun.
RunEvent buildRun(Optional<ParentRunFacet> parentRunFacet, RunEventBuilder runEventBuilder, JobBuilder jobBuilder, SparkListenerStageSubmitted event) {
Stage stage = stageMap.get(event.stageInfo().stageId());
RDD<?> rdd = stage.rdd();
List<Object> nodes = new ArrayList<>();
nodes.addAll(Arrays.asList(event.stageInfo(), stage));
nodes.addAll(Rdds.flattenRDDs(rdd));
return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes);
}
use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.
the class OpenLineageRunEventBuilder method buildRun.
RunEvent buildRun(Optional<ParentRunFacet> parentRunFacet, RunEventBuilder runEventBuilder, JobBuilder jobBuilder, SparkListenerStageCompleted event) {
Stage stage = stageMap.get(event.stageInfo().stageId());
RDD<?> rdd = stage.rdd();
List<Object> nodes = new ArrayList<>();
nodes.addAll(Arrays.asList(event.stageInfo(), stage));
nodes.addAll(Rdds.flattenRDDs(rdd));
return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes);
}
use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.
the class RddExecutionContext method printStages.
private void printStages(String prefix, Stage stage) {
if (stage instanceof ResultStage) {
ResultStage resultStage = (ResultStage) stage;
}
printRDDs(prefix + "(stageId:" + stage.id() + ")-(" + stage.getClass().getSimpleName() + ")- RDD: ", stage.rdd());
Collection<Stage> parents = asJavaCollection(stage.parents());
for (Stage parent : parents) {
printStages(prefix + " \\ ", parent);
}
}
Aggregations