use of io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor in project OpenLineage by OpenLineage.
the class BaseVisitorFactory method getOutputVisitors.
@Override
public List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> getOutputVisitors(OpenLineageContext context) {
DatasetFactory<OpenLineage.OutputDataset> factory = DatasetFactory.output(context.getOpenLineage());
List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> outputCommonVisitors = getCommonVisitors(context, factory);
List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> list = new ArrayList<>(outputCommonVisitors);
list.add(new InsertIntoDataSourceDirVisitor(context));
list.add(new InsertIntoDataSourceVisitor(context));
list.add(new InsertIntoHadoopFsRelationVisitor(context));
list.add(new CreateDataSourceTableAsSelectCommandVisitor(context));
list.add(new InsertIntoDirVisitor(context));
if (InsertIntoHiveTableVisitor.hasHiveClasses()) {
list.add(new InsertIntoHiveTableVisitor(context));
list.add(new InsertIntoHiveDirVisitor(context));
list.add(new CreateHiveTableAsSelectCommandVisitor(context));
}
if (OptimizedCreateHiveTableAsSelectCommandVisitor.hasClasses()) {
list.add(new OptimizedCreateHiveTableAsSelectCommandVisitor(context));
}
list.add(new CreateDataSourceTableCommandVisitor(context));
list.add(new LoadDataCommandVisitor(context));
list.add(new AlterTableRenameCommandVisitor(context));
list.add(new AlterTableAddColumnsCommandVisitor(context));
list.add(new CreateTableCommandVisitor(context));
list.add(new DropTableCommandVisitor(context));
list.add(new TruncateTableCommandVisitor(context));
return list;
}
use of io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor in project OpenLineage by OpenLineage.
the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.
@Test
public void testSqlEventWithJobEventEmitsOnce() {
SparkSession sparkSession = mock(SparkSession.class);
SparkContext sparkContext = mock(SparkContext.class);
EventEmitter emitter = mock(EventEmitter.class);
QueryExecution qe = mock(QueryExecution.class);
LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
SparkPlan plan = mock(SparkPlan.class);
when(sparkSession.sparkContext()).thenReturn(sparkContext);
when(sparkContext.appName()).thenReturn("appName");
when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
when(qe.executedPlan()).thenReturn(plan);
when(plan.sparkContext()).thenReturn(sparkContext);
when(plan.nodeName()).thenReturn("execute");
OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
verify(emitter, times(2)).emit(lineageEvent.capture());
}
Aggregations