use of org.apache.spark.sql.sources.RelationProvider in project OpenLineage by OpenLineage.
the class SaveIntoDataSourceCommandVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(SparkListenerEvent event) {
BaseRelation relation;
SaveIntoDataSourceCommand command = (SaveIntoDataSourceCommand) context.getQueryExecution().get().optimizedPlan();
// as other impls of CreatableRelationProvider may not be able to be handled in the generic way.
if (KafkaRelationVisitor.isKafkaSource(command.dataSource())) {
return KafkaRelationVisitor.createKafkaDatasets(outputDataset(), command.dataSource(), command.options(), command.mode(), command.schema());
}
if (command.dataSource().getClass().getName().contains("DeltaDataSource")) {
if (command.options().contains("path")) {
URI uri = URI.create(command.options().get("path").get());
return Collections.singletonList(outputDataset().getDataset(PathUtils.fromURI(uri, "file"), command.schema()));
}
}
SQLContext sqlContext = context.getSparkSession().get().sqlContext();
try {
if (command.dataSource() instanceof RelationProvider) {
RelationProvider p = (RelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options());
} else {
SchemaRelationProvider p = (SchemaRelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options(), command.schema());
}
} catch (Exception ex) {
// Bad detection of errors in scala
if (ex instanceof SQLException) {
// This can happen on SparkListenerSQLExecutionStart for example for sqlite, when database
// does not exist yet - it will be created as command execution
// Still, we can just ignore it on start, because it will work on end
// see SparkReadWriteIntegTest.testReadFromFileWriteToJdbc
log.warn("Can't create relation: ", ex);
return Collections.emptyList();
}
throw ex;
}
LogicalRelation logicalRelation = new LogicalRelation(relation, relation.schema().toAttributes(), Option.empty(), command.isStreaming());
return delegate(context.getOutputDatasetQueryPlanVisitors(), context.getOutputDatasetBuilders(), event).applyOrElse(logicalRelation, ScalaConversionUtils.toScalaFn((lp) -> Collections.<OutputDataset>emptyList())).stream().map(ds -> {
Builder<String, OpenLineage.DatasetFacet> facetsMap = ImmutableMap.<String, OpenLineage.DatasetFacet>builder();
if (ds.getFacets().getAdditionalProperties() != null) {
facetsMap.putAll(ds.getFacets().getAdditionalProperties());
}
ds.getFacets().getAdditionalProperties().putAll(facetsMap.build());
if (SaveMode.Overwrite == command.mode()) {
// rebuild whole dataset with a LifecycleStateChange facet added
OpenLineage.DatasetFacets facets = context.getOpenLineage().newDatasetFacets(ds.getFacets().getDocumentation(), ds.getFacets().getDataSource(), ds.getFacets().getVersion(), ds.getFacets().getSchema(), context.getOpenLineage().newLifecycleStateChangeDatasetFacet(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE, null));
OpenLineage.OutputDataset newDs = context.getOpenLineage().newOutputDataset(ds.getNamespace(), ds.getName(), facets, ds.getOutputFacets());
return newDs;
}
return ds;
}).collect(Collectors.toList());
}
Aggregations