use of org.apache.spark.sql.execution.datasources.LogicalRelation in project OpenLineage by OpenLineage.
the class SaveIntoDataSourceCommandVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(SparkListenerEvent event) {
BaseRelation relation;
SaveIntoDataSourceCommand command = (SaveIntoDataSourceCommand) context.getQueryExecution().get().optimizedPlan();
// as other impls of CreatableRelationProvider may not be able to be handled in the generic way.
if (KafkaRelationVisitor.isKafkaSource(command.dataSource())) {
return KafkaRelationVisitor.createKafkaDatasets(outputDataset(), command.dataSource(), command.options(), command.mode(), command.schema());
}
if (command.dataSource().getClass().getName().contains("DeltaDataSource")) {
if (command.options().contains("path")) {
URI uri = URI.create(command.options().get("path").get());
return Collections.singletonList(outputDataset().getDataset(PathUtils.fromURI(uri, "file"), command.schema()));
}
}
SQLContext sqlContext = context.getSparkSession().get().sqlContext();
try {
if (command.dataSource() instanceof RelationProvider) {
RelationProvider p = (RelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options());
} else {
SchemaRelationProvider p = (SchemaRelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options(), command.schema());
}
} catch (Exception ex) {
// Bad detection of errors in scala
if (ex instanceof SQLException) {
// This can happen on SparkListenerSQLExecutionStart for example for sqlite, when database
// does not exist yet - it will be created as command execution
// Still, we can just ignore it on start, because it will work on end
// see SparkReadWriteIntegTest.testReadFromFileWriteToJdbc
log.warn("Can't create relation: ", ex);
return Collections.emptyList();
}
throw ex;
}
LogicalRelation logicalRelation = new LogicalRelation(relation, relation.schema().toAttributes(), Option.empty(), command.isStreaming());
return delegate(context.getOutputDatasetQueryPlanVisitors(), context.getOutputDatasetBuilders(), event).applyOrElse(logicalRelation, ScalaConversionUtils.toScalaFn((lp) -> Collections.<OutputDataset>emptyList())).stream().map(ds -> {
Builder<String, OpenLineage.DatasetFacet> facetsMap = ImmutableMap.<String, OpenLineage.DatasetFacet>builder();
if (ds.getFacets().getAdditionalProperties() != null) {
facetsMap.putAll(ds.getFacets().getAdditionalProperties());
}
ds.getFacets().getAdditionalProperties().putAll(facetsMap.build());
if (SaveMode.Overwrite == command.mode()) {
// rebuild whole dataset with a LifecycleStateChange facet added
OpenLineage.DatasetFacets facets = context.getOpenLineage().newDatasetFacets(ds.getFacets().getDocumentation(), ds.getFacets().getDataSource(), ds.getFacets().getVersion(), ds.getFacets().getSchema(), context.getOpenLineage().newLifecycleStateChangeDatasetFacet(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE, null));
OpenLineage.OutputDataset newDs = context.getOpenLineage().newOutputDataset(ds.getNamespace(), ds.getName(), facets, ds.getOutputFacets());
return newDs;
}
return ds;
}).collect(Collectors.toList());
}
use of org.apache.spark.sql.execution.datasources.LogicalRelation in project OpenLineage by OpenLineage.
the class BigQueryNodeVisitor method bigQuerySupplier.
private Optional<Supplier<BigQueryRelation>> bigQuerySupplier(LogicalPlan plan) {
// SaveIntoDataSourceCommand is a special case because it references a CreatableRelationProvider
// Every other write instance references a LogicalRelation(BigQueryRelation, _, _, _)
SQLContext sqlContext = context.getSparkSession().get().sqlContext();
if (plan instanceof SaveIntoDataSourceCommand) {
SaveIntoDataSourceCommand saveCommand = (SaveIntoDataSourceCommand) plan;
CreatableRelationProvider relationProvider = saveCommand.dataSource();
if (relationProvider instanceof BigQueryRelationProvider) {
return Optional.of(() -> (BigQueryRelation) ((BigQueryRelationProvider) relationProvider).createRelation(sqlContext, saveCommand.options(), saveCommand.schema()));
}
} else {
if (plan instanceof LogicalRelation && ((LogicalRelation) plan).relation() instanceof BigQueryRelation) {
return Optional.of(() -> (BigQueryRelation) ((LogicalRelation) plan).relation());
}
}
return Optional.empty();
}
use of org.apache.spark.sql.execution.datasources.LogicalRelation in project OpenLineage by OpenLineage.
the class LogicalRelationDatasetBuilderTest method testApplyForHadoopFsRelation.
@Test
void testApplyForHadoopFsRelation() {
HadoopFsRelation hadoopFsRelation = mock(HadoopFsRelation.class);
LogicalRelation logicalRelation = mock(LogicalRelation.class);
Configuration hadoopConfig = mock(Configuration.class);
SparkContext sparkContext = mock(SparkContext.class);
FileIndex fileIndex = mock(FileIndex.class);
OpenLineage openLineage = mock(OpenLineage.class);
SessionState sessionState = mock(SessionState.class);
Path p1 = new Path("/tmp/path1");
Path p2 = new Path("/tmp/path2");
when(logicalRelation.relation()).thenReturn(hadoopFsRelation);
when(openLineageContext.getSparkContext()).thenReturn(sparkContext);
when(openLineageContext.getSparkSession()).thenReturn(Optional.of(session));
when(openLineageContext.getOpenLineage()).thenReturn(openLineage);
when(openLineage.newDatasetFacetsBuilder()).thenReturn(new OpenLineage.DatasetFacetsBuilder());
when(session.sessionState()).thenReturn(sessionState);
when(sessionState.newHadoopConfWithOptions(any())).thenReturn(hadoopConfig);
when(hadoopFsRelation.location()).thenReturn(fileIndex);
when(fileIndex.rootPaths()).thenReturn(scala.collection.JavaConverters.collectionAsScalaIterableConverter(Arrays.asList(p1, p2)).asScala().toSeq());
try (MockedStatic mocked = mockStatic(PlanUtils.class)) {
when(PlanUtils.getDirectoryPath(p1, hadoopConfig)).thenReturn(new Path("/tmp"));
when(PlanUtils.getDirectoryPath(p2, hadoopConfig)).thenReturn(new Path("/tmp"));
List<OpenLineage.Dataset> datasets = builder.apply(logicalRelation);
assertEquals(1, datasets.size());
OpenLineage.Dataset ds = datasets.get(0);
assertEquals("/tmp", ds.getName());
}
}
use of org.apache.spark.sql.execution.datasources.LogicalRelation in project OpenLineage by OpenLineage.
the class SQLDWDatabricksVisitorTest method testSQLDWRelationBadJdbcUrl.
@Test
void testSQLDWRelationBadJdbcUrl() {
String inputName = "dbo.mytable";
String inputJdbcUrl = "sqlserver://MYTESTSERVER.database.windows.net:1433;database=MYTESTDB";
// Instantiate a MockSQLDWRelation
LogicalRelation lr = new LogicalRelation(new MockSqlDWBaseRelation(inputName, inputJdbcUrl), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false);
TestSqlDWDatabricksVisitor visitor = new TestSqlDWDatabricksVisitor(SparkAgentTestExtension.newContext(session), DatasetFactory.output(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)));
List<OpenLineage.Dataset> datasets = visitor.apply(lr);
assertEquals(0, datasets.size());
}
use of org.apache.spark.sql.execution.datasources.LogicalRelation in project OpenLineage by OpenLineage.
the class SQLDWDatabricksVisitorTest method testSQLDWRelation.
@Test
void testSQLDWRelation() {
String inputName = "\"dbo\".\"table1\"";
String inputJdbcUrl = "jdbc:sqlserver://MYTESTSERVER.database.windows.net:1433;database=MYTESTDB";
String expectedName = "dbo.table1";
String expectedNamespace = "sqlserver://MYTESTSERVER.database.windows.net:1433;database=MYTESTDB;";
// Instantiate a MockSQLDWRelation
LogicalRelation lr = new LogicalRelation(new MockSqlDWBaseRelation(inputName, inputJdbcUrl), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false);
TestSqlDWDatabricksVisitor visitor = new TestSqlDWDatabricksVisitor(SparkAgentTestExtension.newContext(session), DatasetFactory.output(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)));
List<OpenLineage.Dataset> datasets = visitor.apply(lr);
assertEquals(1, datasets.size());
OpenLineage.Dataset ds = datasets.get(0);
assertEquals(expectedNamespace, ds.getNamespace());
assertEquals(expectedName, ds.getName());
}
Aggregations