Search in sources :

Example 1 with InsertIntoHadoopFsRelationVisitor

use of io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor in project OpenLineage by OpenLineage.

the class BaseVisitorFactory method getOutputVisitors.

@Override
public List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> getOutputVisitors(OpenLineageContext context) {
    DatasetFactory<OpenLineage.OutputDataset> factory = DatasetFactory.output(context.getOpenLineage());
    List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> outputCommonVisitors = getCommonVisitors(context, factory);
    List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> list = new ArrayList<>(outputCommonVisitors);
    list.add(new InsertIntoDataSourceDirVisitor(context));
    list.add(new InsertIntoDataSourceVisitor(context));
    list.add(new InsertIntoHadoopFsRelationVisitor(context));
    list.add(new CreateDataSourceTableAsSelectCommandVisitor(context));
    list.add(new InsertIntoDirVisitor(context));
    if (InsertIntoHiveTableVisitor.hasHiveClasses()) {
        list.add(new InsertIntoHiveTableVisitor(context));
        list.add(new InsertIntoHiveDirVisitor(context));
        list.add(new CreateHiveTableAsSelectCommandVisitor(context));
    }
    if (OptimizedCreateHiveTableAsSelectCommandVisitor.hasClasses()) {
        list.add(new OptimizedCreateHiveTableAsSelectCommandVisitor(context));
    }
    list.add(new CreateDataSourceTableCommandVisitor(context));
    list.add(new LoadDataCommandVisitor(context));
    list.add(new AlterTableRenameCommandVisitor(context));
    list.add(new AlterTableAddColumnsCommandVisitor(context));
    list.add(new CreateTableCommandVisitor(context));
    list.add(new DropTableCommandVisitor(context));
    list.add(new TruncateTableCommandVisitor(context));
    return list;
}
Also used : AlterTableRenameCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.AlterTableRenameCommandVisitor) PartialFunction(scala.PartialFunction) OptimizedCreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor) TruncateTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.TruncateTableCommandVisitor) DropTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.DropTableCommandVisitor) ArrayList(java.util.ArrayList) CreateDataSourceTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor) LoadDataCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.LoadDataCommandVisitor) CreateDataSourceTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableAsSelectCommandVisitor) InsertIntoHiveDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveDirVisitor) InsertIntoHiveTableVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveTableVisitor) CreateTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateTableCommandVisitor) CreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor) OptimizedCreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor) InsertIntoDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDirVisitor) InsertIntoDataSourceVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceVisitor) AlterTableAddColumnsCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.AlterTableAddColumnsCommandVisitor) InsertIntoHadoopFsRelationVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor) InsertIntoDataSourceDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceDirVisitor)

Example 2 with InsertIntoHadoopFsRelationVisitor

use of io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor in project OpenLineage by OpenLineage.

the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.

@Test
public void testSqlEventWithJobEventEmitsOnce() {
    SparkSession sparkSession = mock(SparkSession.class);
    SparkContext sparkContext = mock(SparkContext.class);
    EventEmitter emitter = mock(EventEmitter.class);
    QueryExecution qe = mock(QueryExecution.class);
    LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
    SparkPlan plan = mock(SparkPlan.class);
    when(sparkSession.sparkContext()).thenReturn(sparkContext);
    when(sparkContext.appName()).thenReturn("appName");
    when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
    when(qe.executedPlan()).thenReturn(plan);
    when(plan.sparkContext()).thenReturn(sparkContext);
    when(plan.nodeName()).thenReturn("execute");
    OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
    olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
    ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
    executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
    executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
    ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
    verify(emitter, times(2)).emit(lineageEvent.capture());
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) SparkPlan(org.apache.spark.sql.execution.SparkPlan) SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) StageInfo(org.apache.spark.scheduler.StageInfo) StaticExecutionContextFactory(io.openlineage.spark.agent.lifecycle.StaticExecutionContextFactory) InsertIntoHadoopFsRelationCommand(org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand) Properties(java.util.Properties) QueryExecution(org.apache.spark.sql.execution.QueryExecution) SparkListenerSQLExecutionStart(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart) SparkContext(org.apache.spark.SparkContext) ExecutionContext(io.openlineage.spark.agent.lifecycle.ExecutionContext) SparkPlanInfo(org.apache.spark.sql.execution.SparkPlanInfo) OpenLineage(io.openlineage.client.OpenLineage) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) InsertIntoHadoopFsRelationVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor) Test(org.junit.jupiter.api.Test)

Aggregations

InsertIntoHadoopFsRelationVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor)2 OpenLineage (io.openlineage.client.OpenLineage)1 ExecutionContext (io.openlineage.spark.agent.lifecycle.ExecutionContext)1 StaticExecutionContextFactory (io.openlineage.spark.agent.lifecycle.StaticExecutionContextFactory)1 AlterTableAddColumnsCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.AlterTableAddColumnsCommandVisitor)1 AlterTableRenameCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.AlterTableRenameCommandVisitor)1 CreateDataSourceTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableAsSelectCommandVisitor)1 CreateDataSourceTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor)1 CreateHiveTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor)1 CreateTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateTableCommandVisitor)1 DropTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.DropTableCommandVisitor)1 InsertIntoDataSourceDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceDirVisitor)1 InsertIntoDataSourceVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceVisitor)1 InsertIntoDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDirVisitor)1 InsertIntoHiveDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveDirVisitor)1 InsertIntoHiveTableVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveTableVisitor)1 LoadDataCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.LoadDataCommandVisitor)1 OptimizedCreateHiveTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor)1 TruncateTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.TruncateTableCommandVisitor)1 OpenLineageContext (io.openlineage.spark.api.OpenLineageContext)1