Search in sources :

Example 1 with CreateHiveTableAsSelectCommandVisitor

use of io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor in project OpenLineage by OpenLineage.

the class BaseVisitorFactory method getOutputVisitors.

@Override
public List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> getOutputVisitors(OpenLineageContext context) {
    DatasetFactory<OpenLineage.OutputDataset> factory = DatasetFactory.output(context.getOpenLineage());
    List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> outputCommonVisitors = getCommonVisitors(context, factory);
    List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> list = new ArrayList<>(outputCommonVisitors);
    list.add(new InsertIntoDataSourceDirVisitor(context));
    list.add(new InsertIntoDataSourceVisitor(context));
    list.add(new InsertIntoHadoopFsRelationVisitor(context));
    list.add(new CreateDataSourceTableAsSelectCommandVisitor(context));
    list.add(new InsertIntoDirVisitor(context));
    if (InsertIntoHiveTableVisitor.hasHiveClasses()) {
        list.add(new InsertIntoHiveTableVisitor(context));
        list.add(new InsertIntoHiveDirVisitor(context));
        list.add(new CreateHiveTableAsSelectCommandVisitor(context));
    }
    if (OptimizedCreateHiveTableAsSelectCommandVisitor.hasClasses()) {
        list.add(new OptimizedCreateHiveTableAsSelectCommandVisitor(context));
    }
    list.add(new CreateDataSourceTableCommandVisitor(context));
    list.add(new LoadDataCommandVisitor(context));
    list.add(new AlterTableRenameCommandVisitor(context));
    list.add(new AlterTableAddColumnsCommandVisitor(context));
    list.add(new CreateTableCommandVisitor(context));
    list.add(new DropTableCommandVisitor(context));
    list.add(new TruncateTableCommandVisitor(context));
    return list;
}
Also used : AlterTableRenameCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.AlterTableRenameCommandVisitor) PartialFunction(scala.PartialFunction) OptimizedCreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor) TruncateTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.TruncateTableCommandVisitor) DropTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.DropTableCommandVisitor) ArrayList(java.util.ArrayList) CreateDataSourceTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor) LoadDataCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.LoadDataCommandVisitor) CreateDataSourceTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableAsSelectCommandVisitor) InsertIntoHiveDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveDirVisitor) InsertIntoHiveTableVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveTableVisitor) CreateTableCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateTableCommandVisitor) CreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor) OptimizedCreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor) InsertIntoDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDirVisitor) InsertIntoDataSourceVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceVisitor) AlterTableAddColumnsCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.AlterTableAddColumnsCommandVisitor) InsertIntoHadoopFsRelationVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor) InsertIntoDataSourceDirVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceDirVisitor)

Example 2 with CreateHiveTableAsSelectCommandVisitor

use of io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor in project OpenLineage by OpenLineage.

the class CreateHiveTableAsSelectCommandVisitorTest method testCreateHiveTableAsSelectCommand.

@Test
void testCreateHiveTableAsSelectCommand() {
    CreateHiveTableAsSelectCommandVisitor visitor = new CreateHiveTableAsSelectCommandVisitor(OpenLineageContext.builder().sparkSession(Optional.of(session)).sparkContext(session.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build());
    CreateHiveTableAsSelectCommand command = new CreateHiveTableAsSelectCommand(SparkUtils.catalogTable(TableIdentifier$.MODULE$.apply("tablename", Option.apply("db")), CatalogTableType.EXTERNAL(), CatalogStorageFormat$.MODULE$.apply(Option.apply(URI.create("s3://bucket/directory")), null, null, null, false, Map$.MODULE$.empty()), new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, new Metadata(new HashMap<>())), new StructField("value", StringType$.MODULE$, false, new Metadata(new HashMap<>())) })), new LogicalRelation(new JDBCRelation(new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, null), new StructField("value", StringType$.MODULE$, false, null) }), new Partition[] {}, new JDBCOptions("", "temp", scala.collection.immutable.Map$.MODULE$.newBuilder().$plus$eq(Tuple2.apply("driver", Driver.class.getName())).result()), session), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("key", IntegerType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).$plus$eq(new AttributeReference("value", StringType$.MODULE$, false, null, ExprId.apply(2L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false), ScalaConversionUtils.fromList(Arrays.asList("key", "value")), SaveMode.Overwrite);
    assertThat(visitor.isDefinedAt(command)).isTrue();
    List<OpenLineage.OutputDataset> datasets = visitor.apply(command);
    assertEquals(1, datasets.size());
    OpenLineage.OutputDataset outputDataset = datasets.get(0);
    assertEquals(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE, outputDataset.getFacets().getLifecycleStateChange().getLifecycleStateChange());
    assertEquals("directory", outputDataset.getName());
    assertEquals("s3://bucket", outputDataset.getNamespace());
}
Also used : StructType(org.apache.spark.sql.types.StructType) AttributeReference(org.apache.spark.sql.catalyst.expressions.AttributeReference) Metadata(org.apache.spark.sql.types.Metadata) JDBCRelation(org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation) Driver(org.postgresql.Driver) CreateHiveTableAsSelectCommand(org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand) LogicalRelation(org.apache.spark.sql.execution.datasources.LogicalRelation) StructField(org.apache.spark.sql.types.StructField) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) CreateHiveTableAsSelectCommandVisitor(io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor) OpenLineage(io.openlineage.client.OpenLineage) Test(org.junit.jupiter.api.Test)

Aggregations

CreateHiveTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateHiveTableAsSelectCommandVisitor)2 OpenLineage (io.openlineage.client.OpenLineage)1 AlterTableAddColumnsCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.AlterTableAddColumnsCommandVisitor)1 AlterTableRenameCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.AlterTableRenameCommandVisitor)1 CreateDataSourceTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableAsSelectCommandVisitor)1 CreateDataSourceTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor)1 CreateTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.CreateTableCommandVisitor)1 DropTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.DropTableCommandVisitor)1 InsertIntoDataSourceDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceDirVisitor)1 InsertIntoDataSourceVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDataSourceVisitor)1 InsertIntoDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoDirVisitor)1 InsertIntoHadoopFsRelationVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor)1 InsertIntoHiveDirVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveDirVisitor)1 InsertIntoHiveTableVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHiveTableVisitor)1 LoadDataCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.LoadDataCommandVisitor)1 OptimizedCreateHiveTableAsSelectCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.OptimizedCreateHiveTableAsSelectCommandVisitor)1 TruncateTableCommandVisitor (io.openlineage.spark.agent.lifecycle.plan.TruncateTableCommandVisitor)1 ArrayList (java.util.ArrayList)1 AttributeReference (org.apache.spark.sql.catalyst.expressions.AttributeReference)1 LogicalRelation (org.apache.spark.sql.execution.datasources.LogicalRelation)1