use of org.apache.spark.sql.execution.SparkPlan in project OpenLineage by OpenLineage.
the class SparkSQLExecutionContext method buildJob.
protected OpenLineage.JobBuilder buildJob(QueryExecution queryExecution) {
SparkContext sparkContext = queryExecution.executedPlan().sparkContext();
SparkPlan node = queryExecution.executedPlan();
// Unwrap SparkPlan from WholeStageCodegen, as that's not a descriptive or helpful job name
if (node instanceof WholeStageCodegenExec) {
node = ((WholeStageCodegenExec) node).child();
}
return openLineage.newJobBuilder().namespace(this.eventEmitter.getJobNamespace()).name(sparkContext.appName().replaceAll(CAMEL_TO_SNAKE_CASE, "_$1").toLowerCase(Locale.ROOT) + "." + node.nodeName().replaceAll(CAMEL_TO_SNAKE_CASE, "_$1").toLowerCase(Locale.ROOT));
}
use of org.apache.spark.sql.execution.SparkPlan in project OpenLineage by OpenLineage.
the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.
@Test
public void testSqlEventWithJobEventEmitsOnce() {
SparkSession sparkSession = mock(SparkSession.class);
SparkContext sparkContext = mock(SparkContext.class);
EventEmitter emitter = mock(EventEmitter.class);
QueryExecution qe = mock(QueryExecution.class);
LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
SparkPlan plan = mock(SparkPlan.class);
when(sparkSession.sparkContext()).thenReturn(sparkContext);
when(sparkContext.appName()).thenReturn("appName");
when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
when(qe.executedPlan()).thenReturn(plan);
when(plan.sparkContext()).thenReturn(sparkContext);
when(plan.nodeName()).thenReturn("execute");
OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
verify(emitter, times(2)).emit(lineageEvent.capture());
}
Aggregations