use of io.openlineage.spark.agent.util.DatasetIdentifier in project OpenLineage by OpenLineage.
the class InsertIntoHadoopFsRelationVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(LogicalPlan x) {
InsertIntoHadoopFsRelationCommand command = (InsertIntoHadoopFsRelationCommand) x;
DatasetIdentifier di = PathUtils.fromURI(command.outputPath().toUri(), "file");
OpenLineage.OutputDataset outputDataset;
if (SaveMode.Overwrite == command.mode()) {
outputDataset = outputDataset().getDataset(di, command.query().schema(), OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE);
} else {
outputDataset = outputDataset().getDataset(di, command.query().schema());
}
return Collections.singletonList(outputDataset);
}
use of io.openlineage.spark.agent.util.DatasetIdentifier in project OpenLineage by OpenLineage.
the class TruncateTableCommandVisitor method apply.
@Override
public List<OutputDataset> apply(LogicalPlan x) {
TruncateTableCommand command = (TruncateTableCommand) x;
Optional<CatalogTable> tableOpt = catalogTableFor(command.tableName());
if (tableOpt.isPresent()) {
CatalogTable table = tableOpt.get();
DatasetIdentifier datasetIdentifier = PathUtils.fromCatalogTable(table);
DatasetFactory<OutputDataset> datasetFactory = outputDataset();
return Collections.singletonList(datasetFactory.getDataset(datasetIdentifier, new OpenLineage.DatasetFacetsBuilder().schema(null).dataSource(PlanUtils.datasourceFacet(context.getOpenLineage(), datasetIdentifier.getNamespace())).lifecycleStateChange(context.getOpenLineage().newLifecycleStateChangeDatasetFacet(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.TRUNCATE, null)).build()));
} else {
// table does not exist, cannot prepare an event
return Collections.emptyList();
}
}
use of io.openlineage.spark.agent.util.DatasetIdentifier in project OpenLineage by OpenLineage.
the class InsertIntoDataSourceDirVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(LogicalPlan x) {
InsertIntoDataSourceDirCommand command = (InsertIntoDataSourceDirCommand) x;
// URI is required by the InsertIntoDataSourceDirCommand
DatasetIdentifier di = PathUtils.fromURI(command.storage().locationUri().get(), "file");
OpenLineage.OutputDataset outputDataset;
if (command.overwrite()) {
outputDataset = outputDataset().getDataset(di, command.schema(), OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE);
} else {
outputDataset = outputDataset().getDataset(di, command.schema());
}
return Collections.singletonList(outputDataset);
}
use of io.openlineage.spark.agent.util.DatasetIdentifier in project OpenLineage by OpenLineage.
the class IcebergHandler method getDatasetIdentifier.
@Override
public DatasetIdentifier getDatasetIdentifier(SparkSession session, TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
SparkCatalog sparkCatalog = (SparkCatalog) tableCatalog;
String catalogName = sparkCatalog.name();
String prefix = String.format("spark.sql.catalog.%s", catalogName);
Map<String, String> conf = ScalaConversionUtils.<String, String>fromMap(session.conf().getAll());
log.info(conf.toString());
Map<String, String> catalogConf = conf.entrySet().stream().filter(x -> x.getKey().startsWith(prefix)).filter(x -> x.getKey().length() > prefix.length()).collect(Collectors.toMap(// handle dot after prefix
x -> x.getKey().substring(prefix.length() + 1), Map.Entry::getValue));
log.info(catalogConf.toString());
if (catalogConf.isEmpty() || !catalogConf.containsKey("type")) {
throw new UnsupportedCatalogException(catalogName);
}
log.info(catalogConf.get("type"));
switch(catalogConf.get("type")) {
case "hadoop":
return getHadoopIdentifier(catalogConf, identifier.toString());
case "hive":
return getHiveIdentifier(session, catalogConf.get(CatalogProperties.URI), identifier.toString());
default:
throw new UnsupportedCatalogException(catalogConf.get("type"));
}
}
use of io.openlineage.spark.agent.util.DatasetIdentifier in project OpenLineage by OpenLineage.
the class PlanUtils3 method fromDataSourceV2Relation.
public static <D extends OpenLineage.Dataset> List<D> fromDataSourceV2Relation(DatasetFactory<D> datasetFactory, OpenLineageContext context, DataSourceV2Relation relation, OpenLineage.DatasetFacetsBuilder datasetFacetsBuilder) {
if (relation.identifier().isEmpty()) {
throw new IllegalArgumentException("Couldn't find identifier for dataset in plan " + relation);
}
Identifier identifier = relation.identifier().get();
if (relation.catalog().isEmpty() || !(relation.catalog().get() instanceof TableCatalog)) {
throw new IllegalArgumentException("Couldn't find catalog for dataset in plan " + relation);
}
TableCatalog tableCatalog = (TableCatalog) relation.catalog().get();
Map<String, String> tableProperties = relation.table().properties();
Optional<DatasetIdentifier> di = PlanUtils3.getDatasetIdentifier(context, tableCatalog, identifier, tableProperties);
if (!di.isPresent()) {
return Collections.emptyList();
}
OpenLineage openLineage = context.getOpenLineage();
datasetFacetsBuilder.schema(PlanUtils.schemaFacet(openLineage, relation.schema())).dataSource(PlanUtils.datasourceFacet(openLineage, di.get().getNamespace()));
CatalogUtils3.getTableProviderFacet(tableCatalog, tableProperties).map(provider -> datasetFacetsBuilder.put("tableProvider", provider));
return Collections.singletonList(datasetFactory.getDataset(di.get().getName(), di.get().getNamespace(), datasetFacetsBuilder.build()));
}
Aggregations