use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class SparkVersionFacetBuilderTest method testBuild.
@Test
public void testBuild() {
SparkVersionFacetBuilder builder = new SparkVersionFacetBuilder(OpenLineageContext.builder().sparkContext(sparkContext).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build());
Map<String, RunFacet> runFacetMap = new HashMap<>();
builder.build(new SparkListenerSQLExecutionEnd(1, 1L), runFacetMap::put);
assertThat(runFacetMap).hasEntrySatisfying("spark_version", facet -> assertThat(facet).isInstanceOf(SparkVersionFacet.class).hasFieldOrPropertyWithValue("sparkVersion", sparkContext.version()));
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class PlanUtils3 method fromDataSourceV2Relation.
public static <D extends OpenLineage.Dataset> List<D> fromDataSourceV2Relation(DatasetFactory<D> datasetFactory, OpenLineageContext context, DataSourceV2Relation relation, OpenLineage.DatasetFacetsBuilder datasetFacetsBuilder) {
if (relation.identifier().isEmpty()) {
throw new IllegalArgumentException("Couldn't find identifier for dataset in plan " + relation);
}
Identifier identifier = relation.identifier().get();
if (relation.catalog().isEmpty() || !(relation.catalog().get() instanceof TableCatalog)) {
throw new IllegalArgumentException("Couldn't find catalog for dataset in plan " + relation);
}
TableCatalog tableCatalog = (TableCatalog) relation.catalog().get();
Map<String, String> tableProperties = relation.table().properties();
Optional<DatasetIdentifier> di = PlanUtils3.getDatasetIdentifier(context, tableCatalog, identifier, tableProperties);
if (!di.isPresent()) {
return Collections.emptyList();
}
OpenLineage openLineage = context.getOpenLineage();
datasetFacetsBuilder.schema(PlanUtils.schemaFacet(openLineage, relation.schema())).dataSource(PlanUtils.datasourceFacet(openLineage, di.get().getNamespace()));
CatalogUtils3.getTableProviderFacet(tableCatalog, tableProperties).map(provider -> datasetFacetsBuilder.put("tableProvider", provider));
return Collections.singletonList(datasetFactory.getDataset(di.get().getName(), di.get().getNamespace(), datasetFacetsBuilder.build()));
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class AbstractQueryPlanDatasetBuilderTest method testApplyOnBuilderWithGenericArg.
@Test
public void testApplyOnBuilderWithGenericArg() {
SparkSession session = SparkSession.builder().config("spark.sql.warehouse.dir", "/tmp/warehouse").master("local").getOrCreate();
OpenLineage openLineage = new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI);
InputDataset expected = openLineage.newInputDataset("namespace", "the_name", null, null);
OpenLineageContext context = createContext(session, openLineage);
MyGenericArgInputDatasetBuilder<SparkListenerJobEnd> builder = new MyGenericArgInputDatasetBuilder<>(context, true, expected);
SparkListenerJobEnd jobEnd = new SparkListenerJobEnd(1, 2, null);
// Even though our instance of builder is parameterized with SparkListenerJobEnd, it's not
// *compiled* with that argument, so the isDefinedAt method fails to resolve the type arg
Assertions.assertFalse(((PartialFunction) builder).isDefinedAt(jobEnd));
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class AlterTableDatasetBuilder method apply.
@Override
public List<OpenLineage.OutputDataset> apply(AlterTable alterTable) {
TableCatalog tableCatalog = alterTable.catalog();
Table table;
try {
table = alterTable.catalog().loadTable(alterTable.ident());
} catch (Exception e) {
return Collections.emptyList();
}
Optional<DatasetIdentifier> di = PlanUtils3.getDatasetIdentifier(context, tableCatalog, alterTable.ident(), table.properties());
if (di.isPresent()) {
OpenLineage openLineage = context.getOpenLineage();
OpenLineage.DatasetFacetsBuilder builder = openLineage.newDatasetFacetsBuilder().schema(PlanUtils.schemaFacet(openLineage, table.schema())).dataSource(PlanUtils.datasourceFacet(openLineage, di.get().getNamespace()));
Optional<String> datasetVersion = CatalogUtils3.getDatasetVersion(tableCatalog, alterTable.ident(), table.properties());
datasetVersion.ifPresent(version -> builder.version(openLineage.newDatasetVersionDatasetFacet(version)));
return Collections.singletonList(outputDataset().getDataset(di.get().getName(), di.get().getNamespace(), builder.build()));
} else {
return Collections.emptyList();
}
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class CreateReplaceDatasetBuilder method apply.
@Override
public List<OpenLineage.OutputDataset> apply(LogicalPlan x) {
TableCatalog tableCatalog;
Map<String, String> tableProperties;
Identifier identifier;
StructType schema;
OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange lifecycleStateChange;
if (x instanceof CreateTableAsSelect) {
CreateTableAsSelect command = (CreateTableAsSelect) x;
tableCatalog = command.catalog();
tableProperties = ScalaConversionUtils.<String, String>fromMap(command.properties());
identifier = command.tableName();
schema = command.tableSchema();
lifecycleStateChange = OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE;
} else if (x instanceof CreateV2Table) {
CreateV2Table command = (CreateV2Table) x;
tableCatalog = command.catalog();
tableProperties = ScalaConversionUtils.<String, String>fromMap(command.properties());
identifier = command.tableName();
schema = command.tableSchema();
lifecycleStateChange = OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE;
} else if (x instanceof ReplaceTable) {
ReplaceTable command = (ReplaceTable) x;
tableCatalog = command.catalog();
tableProperties = ScalaConversionUtils.<String, String>fromMap(command.properties());
identifier = command.tableName();
schema = command.tableSchema();
lifecycleStateChange = OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE;
} else {
ReplaceTableAsSelect command = (ReplaceTableAsSelect) x;
tableCatalog = command.catalog();
tableProperties = ScalaConversionUtils.<String, String>fromMap(command.properties());
identifier = command.tableName();
schema = command.tableSchema();
lifecycleStateChange = OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE;
}
Optional<DatasetIdentifier> di = PlanUtils3.getDatasetIdentifier(context, tableCatalog, identifier, tableProperties);
if (!di.isPresent()) {
return Collections.emptyList();
}
OpenLineage openLineage = context.getOpenLineage();
OpenLineage.DatasetFacetsBuilder builder = openLineage.newDatasetFacetsBuilder().schema(PlanUtils.schemaFacet(openLineage, schema)).lifecycleStateChange(openLineage.newLifecycleStateChangeDatasetFacet(lifecycleStateChange, null)).dataSource(PlanUtils.datasourceFacet(openLineage, di.get().getNamespace()));
Optional<String> datasetVersion = CatalogUtils3.getDatasetVersion(tableCatalog, identifier, tableProperties);
datasetVersion.ifPresent(version -> builder.version(openLineage.newDatasetVersionDatasetFacet(version)));
CatalogUtils3.getTableProviderFacet(tableCatalog, tableProperties).map(provider -> builder.put("tableProvider", provider));
return Collections.singletonList(outputDataset().getDataset(di.get().getName(), di.get().getNamespace(), builder.build()));
}
Aggregations