use of io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor in project OpenLineage by OpenLineage.
the class BaseVisitorFactory method getOutputVisitors.
@Override
public List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> getOutputVisitors(OpenLineageContext context) {
DatasetFactory<OpenLineage.OutputDataset> factory = DatasetFactory.output(context.getOpenLineage());
List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> outputCommonVisitors = getCommonVisitors(context, factory);
List<PartialFunction<LogicalPlan, List<OpenLineage.OutputDataset>>> list = new ArrayList<>(outputCommonVisitors);
list.add(new InsertIntoDataSourceDirVisitor(context));
list.add(new InsertIntoDataSourceVisitor(context));
list.add(new InsertIntoHadoopFsRelationVisitor(context));
list.add(new CreateDataSourceTableAsSelectCommandVisitor(context));
list.add(new InsertIntoDirVisitor(context));
if (InsertIntoHiveTableVisitor.hasHiveClasses()) {
list.add(new InsertIntoHiveTableVisitor(context));
list.add(new InsertIntoHiveDirVisitor(context));
list.add(new CreateHiveTableAsSelectCommandVisitor(context));
}
if (OptimizedCreateHiveTableAsSelectCommandVisitor.hasClasses()) {
list.add(new OptimizedCreateHiveTableAsSelectCommandVisitor(context));
}
list.add(new CreateDataSourceTableCommandVisitor(context));
list.add(new LoadDataCommandVisitor(context));
list.add(new AlterTableRenameCommandVisitor(context));
list.add(new AlterTableAddColumnsCommandVisitor(context));
list.add(new CreateTableCommandVisitor(context));
list.add(new DropTableCommandVisitor(context));
list.add(new TruncateTableCommandVisitor(context));
return list;
}
use of io.openlineage.spark.agent.lifecycle.plan.CreateDataSourceTableCommandVisitor in project OpenLineage by OpenLineage.
the class CreateDataSourceTableCommandVisitorTest method testCreateDataSourceTableCommand.
@Test
void testCreateDataSourceTableCommand() {
CreateDataSourceTableCommandVisitor visitor = new CreateDataSourceTableCommandVisitor(SparkAgentTestExtension.newContext(session));
CreateDataSourceTableCommand command = new CreateDataSourceTableCommand(SparkUtils.catalogTable(TableIdentifier$.MODULE$.apply("tablename", Option.apply("db")), CatalogTableType.EXTERNAL(), CatalogStorageFormat$.MODULE$.apply(Option.apply(URI.create("s3://bucket/directory")), null, null, null, false, Map$.MODULE$.empty()), new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, new Metadata(new HashMap<>())), new StructField("value", StringType$.MODULE$, false, new Metadata(new HashMap<>())) })), false);
assertThat(visitor.isDefinedAt(command)).isTrue();
List<OpenLineage.OutputDataset> datasets = visitor.apply(command);
assertEquals(1, datasets.size());
OpenLineage.OutputDataset outputDataset = datasets.get(0);
assertEquals(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE, outputDataset.getFacets().getLifecycleStateChange().getLifecycleStateChange());
assertEquals("directory", outputDataset.getName());
assertEquals("s3://bucket", outputDataset.getNamespace());
}
Aggregations