Search in sources :

Example 1 with Stage

use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.

the class OpenLineageRunEventBuilder method buildRun.

RunEvent buildRun(Optional<ParentRunFacet> parentRunFacet, RunEventBuilder runEventBuilder, JobBuilder jobBuilder, SparkListenerStageSubmitted event) {
    Stage stage = stageMap.get(event.stageInfo().stageId());
    RDD<?> rdd = stage.rdd();
    List<Object> nodes = new ArrayList<>();
    nodes.addAll(Arrays.asList(event.stageInfo(), stage));
    nodes.addAll(Rdds.flattenRDDs(rdd));
    return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes);
}
Also used : ArrayList(java.util.ArrayList) Stage(org.apache.spark.scheduler.Stage)

Example 2 with Stage

use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.

the class OpenLineageRunEventBuilder method buildRun.

RunEvent buildRun(Optional<ParentRunFacet> parentRunFacet, RunEventBuilder runEventBuilder, JobBuilder jobBuilder, SparkListenerStageCompleted event) {
    Stage stage = stageMap.get(event.stageInfo().stageId());
    RDD<?> rdd = stage.rdd();
    List<Object> nodes = new ArrayList<>();
    nodes.addAll(Arrays.asList(event.stageInfo(), stage));
    nodes.addAll(Rdds.flattenRDDs(rdd));
    return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes);
}
Also used : ArrayList(java.util.ArrayList) Stage(org.apache.spark.scheduler.Stage)

Example 3 with Stage

use of org.apache.spark.scheduler.Stage in project OpenLineage by OpenLineage.

the class RddExecutionContext method printStages.

private void printStages(String prefix, Stage stage) {
    if (stage instanceof ResultStage) {
        ResultStage resultStage = (ResultStage) stage;
    }
    printRDDs(prefix + "(stageId:" + stage.id() + ")-(" + stage.getClass().getSimpleName() + ")- RDD: ", stage.rdd());
    Collection<Stage> parents = asJavaCollection(stage.parents());
    for (Stage parent : parents) {
        printStages(prefix + " \\ ", parent);
    }
}
Also used : Stage(org.apache.spark.scheduler.Stage) ResultStage(org.apache.spark.scheduler.ResultStage) ResultStage(org.apache.spark.scheduler.ResultStage)

Aggregations

Stage (org.apache.spark.scheduler.Stage)3 ArrayList (java.util.ArrayList)2 ResultStage (org.apache.spark.scheduler.ResultStage)1