use of org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions in project OpenLineage by OpenLineage.
the class OptimizedCreateHiveTableAsSelectCommandVisitorTest method testOptimizedCreateHiveTableAsSelectCommand.
@Test
void testOptimizedCreateHiveTableAsSelectCommand() {
OptimizedCreateHiveTableAsSelectCommandVisitor visitor = new OptimizedCreateHiveTableAsSelectCommandVisitor(SparkAgentTestExtension.newContext(session));
OptimizedCreateHiveTableAsSelectCommand command = new OptimizedCreateHiveTableAsSelectCommand(SparkUtils.catalogTable(TableIdentifier$.MODULE$.apply("tablename", Option.apply("db")), CatalogTableType.EXTERNAL(), CatalogStorageFormat$.MODULE$.apply(Option.apply(URI.create("s3://bucket/directory")), null, null, null, false, Map$.MODULE$.empty()), new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, new Metadata(new HashMap<>())), new StructField("value", StringType$.MODULE$, false, new Metadata(new HashMap<>())) })), new LogicalRelation(new JDBCRelation(new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, null), new StructField("value", StringType$.MODULE$, false, null) }), new Partition[] {}, new JDBCOptions("", "temp", scala.collection.immutable.Map$.MODULE$.newBuilder().$plus$eq(Tuple2.apply("driver", Driver.class.getName())).result()), session), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("key", IntegerType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).$plus$eq(new AttributeReference("value", StringType$.MODULE$, false, null, ExprId.apply(2L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false), ScalaConversionUtils.fromList(Arrays.asList("key", "value")), SaveMode.Overwrite);
assertThat(visitor.isDefinedAt(command)).isTrue();
List<OpenLineage.OutputDataset> datasets = visitor.apply(command);
assertEquals(1, datasets.size());
OpenLineage.OutputDataset outputDataset = datasets.get(0);
assertEquals(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE, outputDataset.getFacets().getLifecycleStateChange().getLifecycleStateChange());
assertEquals("directory", outputDataset.getName());
assertEquals("s3://bucket", outputDataset.getNamespace());
}
use of org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions in project OpenLineage by OpenLineage.
the class LogicalPlanSerializerTest method testSerializeLogicalPlan.
@Test
public void testSerializeLogicalPlan() throws IOException {
String jdbcUrl = "jdbc:postgresql://postgreshost:5432/sparkdata";
String sparkTableName = "my_spark_table";
scala.collection.immutable.Map<String, String> map = (scala.collection.immutable.Map<String, String>) Map$.MODULE$.<String, String>newBuilder().$plus$eq(Tuple2.apply("driver", Driver.class.getName())).result();
JDBCRelation relation = new JDBCRelation(new StructType(new StructField[] { new StructField("name", StringType$.MODULE$, false, Metadata.empty()) }), new Partition[] {}, new JDBCOptions(jdbcUrl, sparkTableName, map), mock(SparkSession.class));
LogicalRelation logicalRelation = new LogicalRelation(relation, Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, Metadata.empty(), ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false);
Aggregate aggregate = new Aggregate(Seq$.MODULE$.<Expression>empty(), Seq$.MODULE$.<NamedExpression>empty(), logicalRelation);
Map<String, Object> aggregateActualNode = objectMapper.readValue(logicalPlanSerializer.serialize(aggregate), mapTypeReference);
Map<String, Object> logicalRelationActualNode = objectMapper.readValue(logicalPlanSerializer.serialize(logicalRelation), mapTypeReference);
Path expectedAggregateNodePath = Paths.get("src", "test", "resources", "test_data", "serde", "aggregate-node.json");
Path logicalRelationNodePath = Paths.get("src", "test", "resources", "test_data", "serde", "logicalrelation-node.json");
Map<String, Object> expectedAggregateNode = objectMapper.readValue(expectedAggregateNodePath.toFile(), mapTypeReference);
Map<String, Object> expectedLogicalRelationNode = objectMapper.readValue(logicalRelationNodePath.toFile(), mapTypeReference);
assertThat(aggregateActualNode).satisfies(new MatchesMapRecursively(expectedAggregateNode));
assertThat(logicalRelationActualNode).satisfies(new MatchesMapRecursively(expectedLogicalRelationNode));
}
use of org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions in project OpenLineage by OpenLineage.
the class JdbcHandler method getDatasetIdentifier.
@SneakyThrows
@Override
public DatasetIdentifier getDatasetIdentifier(SparkSession session, TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
JDBCTableCatalog catalog = (JDBCTableCatalog) tableCatalog;
JDBCOptions options = (JDBCOptions) FieldUtils.readField(catalog, "options", true);
String name = Stream.concat(Arrays.stream(identifier.namespace()), Stream.of(identifier.name())).collect(Collectors.joining("."));
return new DatasetIdentifier(name, JdbcUtils.sanitizeJdbcUrl(options.url()));
}
use of org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions in project OpenLineage by OpenLineage.
the class CreateHiveTableAsSelectCommandVisitorTest method testCreateHiveTableAsSelectCommand.
@Test
void testCreateHiveTableAsSelectCommand() {
CreateHiveTableAsSelectCommandVisitor visitor = new CreateHiveTableAsSelectCommandVisitor(OpenLineageContext.builder().sparkSession(Optional.of(session)).sparkContext(session.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build());
CreateHiveTableAsSelectCommand command = new CreateHiveTableAsSelectCommand(SparkUtils.catalogTable(TableIdentifier$.MODULE$.apply("tablename", Option.apply("db")), CatalogTableType.EXTERNAL(), CatalogStorageFormat$.MODULE$.apply(Option.apply(URI.create("s3://bucket/directory")), null, null, null, false, Map$.MODULE$.empty()), new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, new Metadata(new HashMap<>())), new StructField("value", StringType$.MODULE$, false, new Metadata(new HashMap<>())) })), new LogicalRelation(new JDBCRelation(new StructType(new StructField[] { new StructField("key", IntegerType$.MODULE$, false, null), new StructField("value", StringType$.MODULE$, false, null) }), new Partition[] {}, new JDBCOptions("", "temp", scala.collection.immutable.Map$.MODULE$.newBuilder().$plus$eq(Tuple2.apply("driver", Driver.class.getName())).result()), session), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("key", IntegerType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).$plus$eq(new AttributeReference("value", StringType$.MODULE$, false, null, ExprId.apply(2L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false), ScalaConversionUtils.fromList(Arrays.asList("key", "value")), SaveMode.Overwrite);
assertThat(visitor.isDefinedAt(command)).isTrue();
List<OpenLineage.OutputDataset> datasets = visitor.apply(command);
assertEquals(1, datasets.size());
OpenLineage.OutputDataset outputDataset = datasets.get(0);
assertEquals(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE, outputDataset.getFacets().getLifecycleStateChange().getLifecycleStateChange());
assertEquals("directory", outputDataset.getName());
assertEquals("s3://bucket", outputDataset.getNamespace());
}
use of org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions in project OpenLineage by OpenLineage.
the class JdbcHandlerTest method testGetDatasetIdentifier.
@Test
@SneakyThrows
public void testGetDatasetIdentifier() {
JdbcHandler handler = new JdbcHandler();
JDBCTableCatalog tableCatalog = new JDBCTableCatalog();
JDBCOptions options = mock(JDBCOptions.class);
when(options.url()).thenReturn("jdbc:postgresql://postgreshost:5432");
FieldUtils.writeField(tableCatalog, "options", options, true);
DatasetIdentifier datasetIdentifier = handler.getDatasetIdentifier(mock(SparkSession.class), tableCatalog, Identifier.of(new String[] { "database", "schema" }, "table"), new HashMap<>());
assertEquals("database.schema.table", datasetIdentifier.getName());
assertEquals("postgresql://postgreshost:5432", datasetIdentifier.getNamespace());
}
Aggregations